Nharen
/

Reward_Rush_DQN_Cart_Pole

@@ -21,24 +21,31 @@ model-index:
       value: 100
 ---
-### Model Architectures
-#### CartPole-v1 (DQN)
-* **File:** `Cartpole.pth`
-* **Algorithm:** Deep Q-Network
-* **Input:** 4 discrete observations
-* **Output:** 2 discrete actions (Left/Right)
 * **Network Structure:**
-    * **Input Layer:** 4 -> 128 (Linear)
-    * **Activation:** ReLU
-    * **Hidden Layer:** 128 -> 128 (Linear)
-    * **Activation:** ReLU
-    * **Output Layer:** 128 -> 2 (Linear)
-### Test Code
-```
 import torch
 import torch.nn as nn
 import gymnasium as gym
@@ -46,29 +53,32 @@ import numpy as np
 from huggingface_hub import hf_hub_download
 class MatchedNet(nn.Module):
-    def __init__(self, n_observations=4, n_actions=2):
-        super(MatchedNet, self).__init__()
-        self.layer1 = nn.Linear(n_observations, 128)
         self.layer2 = nn.Linear(128, 128)
-        self.layer3 = nn.Linear(128, n_actions)
     def forward(self, x):
         x = torch.relu(self.layer1(x))
         x = torch.relu(self.layer2(x))
         return self.layer3(x)
-def run_test():
-    repo_id = "Nharen/Reward_Rush_DQN_Cart_Pole"
-    path = hf_hub_download(repo_id=repo_id, filename="Cartpole.pth")
     model = MatchedNet()
     state_dict = torch.load(path, map_location='cpu', weights_only=True)
     model.load_state_dict(state_dict)
     model.eval()
     env = gym.make("CartPole-v1")
-    rewards = []
     for _ in range(100):
         state, _ = env.reset()
         episode_reward = 0
@@ -76,17 +86,16 @@ def run_test():
         while not done:
             state_t = torch.as_tensor(state, dtype=torch.float32).unsqueeze(0)
             with torch.no_grad():
-                action = model(state_t).max(1)[1].item()
             state, reward, terminated, truncated, _ = env.step(action)
             episode_reward += reward
             done = terminated or truncated
-        rewards.append(episode_reward)
-    print(f"Average Reward: {np.mean(rewards):.2f}")
     env.close()
 if __name__ == "__main__":
-    run_test()
 ```

       value: 100
 ---
+# Reward Rush: CartPole DQN
+This repository contains the cleaned weights for a Deep Q-Network agent trained for the CartPole-v1 environment.
+## Model Architecture
+The model utilizes a multi-layer perceptron structure designed for low-latency inference:
+* **Input:** 4 state observations
+* **Output:** 2 discrete actions
 * **Network Structure:**
+    * Linear(4, 128) -> ReLU
+    * Linear(128, 128) -> ReLU
+    * Linear(128, 2)
+## Common Implementation Mistakes to Avoid
+1. **Variable Naming:** The weights are mapped to specific names: `layer1`, `layer2`, and `layer3`. Using generic names like `fc1` or `nn.Sequential` will result in a loading error.
+2. **Missing Batch Dimension:** The model expects a batch dimension. Input states must be wrapped using `unsqueeze(0)` before inference.
+3. **Inference Logic:** The model outputs raw Q-values for both actions. Use `argmax(dim=1)` to select the correct action index for the environment.
+## Download and Test Code
+This script downloads the weights from the Hugging Face repository, initializes the environment, and evaluates the agent over 100 test episodes.
+```python
 import torch
 import torch.nn as nn
 import gymnasium as gym
 from huggingface_hub import hf_hub_download
 class MatchedNet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.layer1 = nn.Linear(4, 128)
         self.layer2 = nn.Linear(128, 128)
+        self.layer3 = nn.Linear(128, 2)
     def forward(self, x):
         x = torch.relu(self.layer1(x))
         x = torch.relu(self.layer2(x))
         return self.layer3(x)
+def run_cartpole_test():
+    path = hf_hub_download(repo_id="Nharen/Reward_Rush_DQN_Cart_Pole", filename="Cartpole.pth")
     model = MatchedNet()
     state_dict = torch.load(path, map_location='cpu', weights_only=True)
+    if isinstance(state_dict, dict) and "policy_net_state_dict" in state_dict:
+        state_dict = state_dict["policy_net_state_dict"]
     model.load_state_dict(state_dict)
     model.eval()
     env = gym.make("CartPole-v1")
+    total_rewards = []
     for _ in range(100):
         state, _ = env.reset()
         episode_reward = 0
         while not done:
             state_t = torch.as_tensor(state, dtype=torch.float32).unsqueeze(0)
             with torch.no_grad():
+                action = model(state_t).argmax(dim=1).item()
             state, reward, terminated, truncated, _ = env.step(action)
             episode_reward += reward
             done = terminated or truncated
+        total_rewards.append(episode_reward)
+    print(f"Average Reward: {np.mean(total_rewards)}")
     env.close()
 if __name__ == "__main__":
+    run_cartpole_test()
 ```