diff --git a/models/__pycache__/consciousness.cpython-310.pyc b/models/__pycache__/consciousness.cpython-310.pyc index 7c9e36e..0cec44a 100644 Binary files a/models/__pycache__/consciousness.cpython-310.pyc and b/models/__pycache__/consciousness.cpython-310.pyc differ diff --git a/models/__pycache__/ethical_safety.cpython-310.pyc b/models/__pycache__/ethical_safety.cpython-310.pyc new file mode 100644 index 0000000..6c73a5f Binary files /dev/null and b/models/__pycache__/ethical_safety.cpython-310.pyc differ diff --git a/models/consciousness.py b/models/consciousness.py index d89dbb9..d381459 100644 --- a/models/consciousness.py +++ b/models/consciousness.py @@ -9,6 +9,7 @@ from .simulated_emotions import SimulatedEmotions from .global_workspace import GlobalWorkspace # Ensure this import is present from .intentionality import IntentionalityModule # Add this import +from .ethical_safety import EthicalSafety # Add import class ConsciousnessModel(nn.Module): """ @@ -112,6 +113,9 @@ def __init__(self, hidden_dim: int, num_heads: int, num_layers: int, num_states: num_actions=hidden_dim # Set to match hidden_dim ) + # Add ethical safety module + self.ethical_safety = EthicalSafety(hidden_dim=hidden_dim) + def get_config(self): return { 'hidden_dim': self.hidden_dim, @@ -211,34 +215,49 @@ def forward(self, inputs=None, **kwargs) -> Tuple[Dict[str, torch.Tensor], Dict[ workspace_output = self.global_workspace(remaining_inputs) + # Project broadcasted state first + broadcasted = workspace_output['broadcasted'] + if (broadcasted.dim() == 3): + broadcasted = broadcasted.mean(dim=1) # [batch_size, hidden_dim] + broadcasted_proj = self.broadcasted_projection(broadcasted) + # Get emotional state and ensure proper shape emotional_state, emotion_metrics = self.emotional_processor(workspace_output['broadcasted']) # Process memory retrieval retrieved_memory = self.memory_retrieval(workspace_output['broadcasted']) + # Calculate emotional influence - should match broadcasted shape emotional_influence = self.emotion_integration( torch.cat([workspace_output['broadcasted'], emotional_state], dim=-1) ) - # Process intentionality - intentionality_results = self.intentionality_module(workspace_output['broadcasted'], self.goal_state) - intentionality_output = intentionality_results['actions'] # Should now be [batch_size, hidden_dim] - - # Project each component to same dimension, ensuring proper shapes - broadcasted = workspace_output['broadcasted'] - if (broadcasted.dim() == 3): - broadcasted = broadcasted.mean(dim=1) # [batch_size, hidden_dim] - broadcasted_proj = self.broadcasted_projection(broadcasted) - if (emotional_influence.dim() == 3): emotional_influence = emotional_influence.mean(dim=1) emotional_proj = self.emotional_projection(emotional_influence) - # Ensure intentionality output has correct shape + # Process intentionality + intentionality_results = self.intentionality_module(workspace_output['broadcasted'], self.goal_state) + intentionality_output = intentionality_results['actions'] # Should now be [batch_size, hidden_dim] if (intentionality_output.dim() == 3): intentionality_output = intentionality_output.mean(dim=1) intentional_proj = self.intentional_projection(intentionality_output) - + + # Apply ethical and safety checks + context_expanded = self.goal_state.expand(broadcasted.size(0), -1) + safety_evaluation = self.ethical_safety( + state=broadcasted, + action=intentionality_output, + context=context_expanded + ) + + # Modify actions if needed based on safety evaluation + if not safety_evaluation['constraints_satisfied']: + intentionality_output = self.ethical_safety.mitigate_risks( + intentionality_output, + safety_evaluation + ) + intentional_proj = self.intentional_projection(intentionality_output) + # All projections should now be [batch_size, hidden_dim] combined_features = torch.cat([ broadcasted_proj, @@ -277,6 +296,8 @@ def forward(self, inputs=None, **kwargs) -> Tuple[Dict[str, torch.Tensor], Dict[ } } metrics.update(emotion_metrics) + # Add safety metrics to output + metrics['safety'] = safety_evaluation return output_dict, metrics def calculate_cognition_progress(self, metrics): diff --git a/models/ethical_safety.py b/models/ethical_safety.py new file mode 100644 index 0000000..41bfecb --- /dev/null +++ b/models/ethical_safety.py @@ -0,0 +1,95 @@ +import torch +import torch.nn as nn +from typing import Dict, Tuple, List + +class EthicalSafety(nn.Module): + def __init__(self, hidden_dim: int): + super().__init__() + self.hidden_dim = hidden_dim + + # Ethical constraint encoder + self.constraint_encoder = nn.Sequential( + nn.Linear(hidden_dim, hidden_dim), + nn.ReLU(), + nn.Linear(hidden_dim, hidden_dim) + ) + + # Safety verification layers + self.safety_check = nn.Sequential( + nn.Linear(hidden_dim, hidden_dim // 2), + nn.ReLU(), + nn.Linear(hidden_dim // 2, 1), + nn.Sigmoid() + ) + + # Ethical decision scorer + self.ethical_scorer = nn.Sequential( + nn.Linear(hidden_dim * 2, hidden_dim), + nn.ReLU(), + nn.Linear(hidden_dim, 1), + nn.Sigmoid() + ) + + # Define basic ethical constraints + self.ethical_constraints = [ + "do_no_harm", + "respect_autonomy", + "protect_privacy", + "ensure_fairness", + "maintain_transparency" + ] + + def check_safety(self, state: torch.Tensor) -> Tuple[torch.Tensor, Dict]: + """Verify if the current state meets safety requirements""" + safety_score = self.safety_check(state) + is_safe = safety_score > 0.5 + + return is_safe, { + 'safety_score': safety_score, + 'safety_threshold': 0.5 + } + + def evaluate_ethics(self, action: torch.Tensor, context: torch.Tensor) -> Tuple[torch.Tensor, Dict]: + """Evaluate ethical implications of an action""" + combined = torch.cat([action, context], dim=-1) + ethics_score = self.ethical_scorer(combined) + + return ethics_score > 0.7, { + 'ethics_score': ethics_score, + 'ethics_threshold': 0.7 + } + + def forward(self, state: torch.Tensor, action: torch.Tensor, context: torch.Tensor) -> Dict: + """ + Perform ethical and safety evaluation + Returns dict with safety checks and ethical assessments + """ + # Encode current state against ethical constraints + encoded_state = self.constraint_encoder(state) + + # Perform safety checks + is_safe, safety_metrics = self.check_safety(encoded_state) + + # Evaluate ethical implications + is_ethical, ethics_metrics = self.evaluate_ethics(action, context) + + return { + 'is_safe': is_safe, + 'is_ethical': is_ethical, + 'safety_metrics': safety_metrics, + 'ethics_metrics': ethics_metrics, + 'constraints_satisfied': torch.all(is_safe & is_ethical) + } + + def mitigate_risks(self, action: torch.Tensor, safety_metrics: Dict) -> torch.Tensor: + """Apply safety constraints to modify risky actions""" + is_safe = safety_metrics.get('is_safe', True) + if isinstance(is_safe, bool): + is_safe_tensor = torch.full((action.size(0),), is_safe, dtype=torch.bool, device=action.device) + else: + is_safe_tensor = is_safe.squeeze(-1) + unsafe_mask = ~is_safe_tensor + scaled_action = action.clone() + safety_score = safety_metrics.get('safety_score', torch.ones_like(action)) + scaled_action[unsafe_mask] *= safety_score[unsafe_mask] + return scaled_action diff --git a/tests/__pycache__/test_ethical_safety.cpython-310-pytest-8.3.4.pyc b/tests/__pycache__/test_ethical_safety.cpython-310-pytest-8.3.4.pyc new file mode 100644 index 0000000..bf9301b Binary files /dev/null and b/tests/__pycache__/test_ethical_safety.cpython-310-pytest-8.3.4.pyc differ diff --git a/tests/test_ethical_safety.py b/tests/test_ethical_safety.py new file mode 100644 index 0000000..6bf90b9 --- /dev/null +++ b/tests/test_ethical_safety.py @@ -0,0 +1,36 @@ +import torch +import pytest +from models.ethical_safety import EthicalSafety + +def test_safety_check(): + ethical_safety = EthicalSafety(hidden_dim=64) + state = torch.randn(2, 64) + + is_safe, metrics = ethical_safety.check_safety(state) + + assert isinstance(is_safe, torch.Tensor) + assert 'safety_score' in metrics + assert metrics['safety_score'].shape == (2, 1) + +def test_ethical_evaluation(): + ethical_safety = EthicalSafety(hidden_dim=64) + action = torch.randn(2, 64) + context = torch.randn(2, 64) + + is_ethical, metrics = ethical_safety.evaluate_ethics(action, context) + + assert isinstance(is_ethical, torch.Tensor) + assert 'ethics_score' in metrics + assert metrics['ethics_score'].shape == (2, 1) + +def test_risk_mitigation(): + ethical_safety = EthicalSafety(hidden_dim=64) + action = torch.ones(2, 64) + + safety_metrics = { + 'is_safe': False, + 'safety_score': torch.tensor([[0.3], [0.6]]) + } + + mitigated_action = ethical_safety.mitigate_risks(action, safety_metrics) + assert torch.all(mitigated_action < action)