architecture: arch_encoder: embed_dim: 64 max_layers: 6 max_neurons: 8 latent_dim: 256 transformer: decoder: activation: gelu d_model: 512 dim_feedforward: 2048 dropout: 0.025 num_heads: 8 num_layers: 6 num_memory_tokens: 8 encoder: activation: gelu d_model: 512 dim_feedforward: 2048 dropout: 0.025 num_heads: 8 num_layers: 10 pooling: mean positional_encoding: learned use_positional_encoding: true type: transformer dataloader: num_workers: 0 pin_memory: true dataset: hf_dataset: maximuspowers/muat-fourier-5 input_mode: signature max_dimensions: max_hidden_layers: 6 max_neurons_per_layer: 8 max_sequence_length: 5 neuron_profile: features_per_neuron: 5 methods: - fourier random_seed: 42 test_split: 0.1 train_split: 0.8 val_split: 0.1 device: type: auto evaluation: metrics: - mse - mae - rmse - cosine_similarity - relative_error - r2_score per_layer_metrics: false hub: enabled: true private: false push_logs: true push_metrics: true push_model: true repo_id: maximuspowers/weight-autoencoder-mlp-v1 token: logging: checkpoint: enabled: true mode: min monitor: val_loss save_best_only: true tensorboard: auto_launch: true enabled: true log_interval: 10 port: 6006 visualizations: enabled: true log_interval: 1 num_image_samples: 4 verbose: true loss: contrastive: enabled: false projection_head: hidden_dim: 128 input_dim: 256 output_dim: 64 temperature: 0.1 weight: 0.05 functional: benchmark_path: /Users/max/Desktop/muat/model_zoo/configs/autoencoder/benchmark_dataset.json enabled: true test_samples: null weight: 0.05 reconstruction: enabled: true type: mse weight: 0.6 variance: enabled: false target_variance: 0.01 weight: 0.1 run_dir: /Users/max/Desktop/muat/model_zoo/runs/train-encoder-decoder_config_2025-12-20_12-55-25 run_log_cleanup: false tokenization: chunk_size: 1 granularity: neuron include_metadata: false max_tokens: 64 training: batch_size: 32 early_stopping: enabled: true mode: min monitor: val_loss patience: 50 epochs: 1000 gradient_accumulation_steps: 4 learning_rate: 0.0001 lr_scheduler: enabled: true factor: 0.5 min_lr: 1.0e-06 patience: 5 max_grad_norm: 1.0 optimizer: adamw weight_decay: 0.0001