pomni_4B / config.json
Neel-Gupta's picture
Save model using Keras.
6952898 verified
{"module": "keras_hub.src.models.gemma3.gemma3_causal_lm", "class_name": "Gemma3CausalLM", "config": {"backbone": {"module": "keras_hub.src.models.gemma3.gemma3_backbone", "class_name": "Gemma3Backbone", "config": {"name": "gemma3_backbone", "trainable": true, "vocabulary_size": 262144, "image_size": null, "num_layers": 34, "num_query_heads": 8, "num_key_value_heads": 4, "hidden_dim": 2560, "intermediate_dim": 10240, "head_dim": 256, "query_head_dim_normalize": true, "use_query_key_norm": true, "use_post_ffw_norm": true, "use_post_attention_norm": true, "attention_logit_soft_cap": null, "final_logit_soft_cap": null, "use_sliding_window_attention": true, "sliding_window_size": 1024, "local_rope_scaling_factor": 1.0, "global_rope_scaling_factor": 8.0, "vision_encoder": null, "layer_norm_epsilon": 1e-06, "dropout": 0}, "registered_name": "keras_hub>Gemma3Backbone", "build_config": {"input_shape": null}, "compile_config": {}}, "preprocessor": {"module": "keras_hub.src.models.gemma3.gemma3_causal_lm_preprocessor", "class_name": "Gemma3CausalLMPreprocessor", "config": {"name": "gemma3_causal_lm_preprocessor", "trainable": true, "dtype": {"module": "keras", "class_name": "DTypePolicy", "config": {"name": "float32"}, "registered_name": null}, "tokenizer": {"module": "keras_hub.src.models.gemma3.gemma3_tokenizer", "class_name": "Gemma3Tokenizer", "config": {"name": "gemma3_tokenizer", "trainable": true, "dtype": {"module": "keras", "class_name": "DTypePolicy", "config": {"name": "int32"}, "registered_name": null}, "config_file": "tokenizer.json", "proto": null, "sequence_length": null, "add_bos": false, "add_eos": false}, "registered_name": "keras_hub>Gemma3Tokenizer"}, "config_file": "preprocessor.json", "sequence_length": 512, "add_start_token": true, "add_end_token": true, "num_vision_tokens_per_image": 0, "max_images_per_prompt": 0}, "registered_name": "keras_hub>Gemma3CausalLMPreprocessor"}, "name": "gemma3_causal_lm"}, "registered_name": "keras_hub>Gemma3CausalLM", "build_config": {"input_shape": null}, "compile_config": {"optimizer": {"module": "keras.optimizers", "class_name": "AdamW", "config": {"name": "adamw", "learning_rate": 9.999999747378752e-06, "weight_decay": 0.001, "clipnorm": null, "global_clipnorm": null, "clipvalue": null, "use_ema": false, "ema_momentum": 0.99, "ema_overwrite_frequency": null, "loss_scale_factor": null, "gradient_accumulation_steps": null, "beta_1": 0.9, "beta_2": 0.999, "epsilon": 1e-07, "amsgrad": false}, "registered_name": null}, "loss": {"module": "keras.losses", "class_name": "SparseCategoricalCrossentropy", "config": {"name": "sparse_categorical_crossentropy", "reduction": "sum_over_batch_size", "from_logits": true, "ignore_class": null}, "registered_name": null}, "loss_weights": null, "metrics": null, "weighted_metrics": [{"module": "keras.metrics", "class_name": "SparseCategoricalAccuracy", "config": {"name": "sparse_categorical_accuracy", "dtype": "float32"}, "registered_name": null}], "run_eagerly": false, "steps_per_execution": 1, "jit_compile": true}}