From e9e9d9989e7540265d77a7270f95f1cca14a89d6 Mon Sep 17 00:00:00 2001 From: kasinadhsarma Date: Fri, 27 Dec 2024 11:36:26 +0530 Subject: [PATCH] Add EthicalSafety model and corresponding tests for safety and ethics evaluations --- .../__pycache__/consciousness.cpython-310.pyc | Bin 7566 -> 7864 bytes .../ethical_safety.cpython-310.pyc | Bin 0 -> 2777 bytes models/consciousness.py | 45 ++++++--- models/ethical_safety.py | 95 ++++++++++++++++++ ...thical_safety.cpython-310-pytest-8.3.4.pyc | Bin 0 -> 3239 bytes tests/test_ethical_safety.py | 36 +++++++ 6 files changed, 164 insertions(+), 12 deletions(-) create mode 100644 models/__pycache__/ethical_safety.cpython-310.pyc create mode 100644 models/ethical_safety.py create mode 100644 tests/__pycache__/test_ethical_safety.cpython-310-pytest-8.3.4.pyc create mode 100644 tests/test_ethical_safety.py diff --git a/models/__pycache__/consciousness.cpython-310.pyc b/models/__pycache__/consciousness.cpython-310.pyc index 7c9e36e5349557b1e2f94d872af1a3f7e4e3fb19..0cec44a3b184b4c04792ac64e7a1caf7fd6ace25 100644 GIT binary patch delta 2733 zcmZ8jUu+yl8Qs%8Y@AyB)NKT=Z=vOswNq4L06#RG5KMTAhc64gd+V%9>3?&zoc4PCOf2_1gTzE9Pm1M)3x=D!2Xw3Oh1bNUF-aL8l{2E6y(n1LIw6g&!XWsx0{_?-_Th`rigR5rvfW_N z@w=DByV=XrC6IPI^7rqRf1Q%Qzpu12I4QBj`P}gb&;-OnPiTiN)?DT}k!Mq{wYQ8XMyhwT&~%^#7ok9T)>*)z~;o9qHgA3{K% z*aARYVqV02?*-3J4h?%4hZeFV#{8X_4Y&=3=fiGMW4_02A=+tN`;|$Mi8QTiYZm8@$&#Y&uPIEU1 z$d#=e&uo*i0j)fj+S}wC>Nq>jfyBHk8Rwx_h|=rDM8dOOmFB^Q3S5($`_^CP=Du~4 zE0e7fH$kNU^zt56S+8uX<03Co6I?6t(mwf3UfL&@fKlZo;FdWswx(bS7A)aBME{UWV5mBbmCvLPp)21)a#Mq4*?<2tWz&G6hdAqubZI-OZkc#YRPD%81I zac*jn%!|>~swGoXi*tVH55mY9xE`Fc_ot?34xQ=T67}844f?~g@Bh$RDKz1n$ny*p zabBFW?tM<4a6H`dT*n#5jeN*hMnKfopn^q^6S1C%;|yeTZhHB4(rN`DZAP_`mJ z?D!sy(=vJ5I*ZU^D1Krc)vsc6B>rSAi(P9~{|v+~2BfhDr8CY!@ee|~=Wlp%Dj0ZStS9B7-GE~Bt@`oj zFQOT|apLmjfNg|B$Mx=(Jlu`A{5%UB>N;WMQG3XOb@p8pm*9HAEr*f;wZjeBYiQ_I z94hV&IX$T_-m9NH9hYIty#X@z#InR@fNs%oYAvywo`QkfE8xBso&midssuNNU#v3HA+;W6RP~M3sN~M@B}Mt<=Z4l=yRF zIjtJxYD<)@)=qPJnUL>^r(QXVv+rZ~%i{Is^h5>`qZX$yv@+oA2RQx) zLLW!conGKXaoX)U{b8)b(!+qYGf4^kIja2v0qwJ2BB1@)fc@<DD?aiMLuylV2krZ&~&z8Zq(g`!2-gQb5@`lz$E3>j=_!^pHJ=AP*b91DFaB zYmgi4ZS3B{Zo#&}|1fmDUeC4<`1A;>;04W!igHQ;P{L^dLrIZE&G6K0ju@tqDyQnk ziG}h2)s#C5%)uT*R$06;bA;I9H#39JNI?mus{rs0KwY>i)6FKEmV!h@3neKc{4m4Zu`|2g z8FOcxWa${Ukg8NFpy-N+v=Mmw(ubh*VWo-(5akI~s#H~dLBk&aQJYrf2?^2VoIBo4 zySC=@x#xW6+;i_e_l)0NdVSFfOC>{s&-KNi^TpB2)-3tstk4w$T%b&lu`2l&CkkRHv z7T3JBkgc~O|MZKVaAGRNx*=m+BI4cvpPe}hI7cURx=}WrekbBL3m4`r(01D5^RJVi9u=RzpKRv0C#>-|jRW^% zlqN~F%yW0py30H#_FB|yJN@o<{Dj{$u8{?Pr8s{-rjpv=9l3tkk6K|r?)Br{aZ0Y! zb^Bcm)jQ|^ivPR#*8TJVsw#5HnP8FAtCg2qRn>j;X z`{Ol|J~Z!Vm|??X|l&qdkg_{Vg~@yGV@~Qd*{7YRx0c}XjQEZ# ziAME?v_YPh2l;`XNTb5GW>A2U5$9HlnI_41WLlucYw`ve7+^EUdTFJ!ArFd4kro@$ zV5}XKlG4c9fcv{Dvq4f?DN_x63FxJfos=MkHB5{h%Ik@-L&4yc!~jY;F?SemC*}^r z1V%M6fnkdTt)!gT9VJU|Pyy>iQUPlxN3 z4YDC829Pys15u+DM6g$dy^3~_CbKnxwZCZ?C#E12z93vp}_v#O52`UF%n|!=@5yf?c7df3gI6Mzx zv%yy2Csi?%ROyAiH0Q_OS|oagSP46XE_)FUBlrkM5L|@g2zWWOt0QjBJvh7<;V8nl z08$0IRcb(o4Wm}qU-i;SH4LCjKY>OqyQIxJw1AUScPu`MZla^x7eclg^&HpRZZt!5 zC_~T2whK2BuBIEYm(kFta1A7Sow*Q>ZF5&P&{?5$wQ1naO&{V}U z5huldf%aDsR?(7ccS9#mb8gpJ>!lj328%AA^`a|C`z->-XIBw0eyYQ6beI=KyGwHR zR@-*@wLKLw!_VwlAr}ABp5tfULjO60?_m%V%02d7IDvB^We=nNM+m18MDCa&dkR4m z4ZcMgzE)EuiW&P24u8iVo1J)g8ASmnP{M6z*Z}c~@K)%ON)9Rd^K!u;x~Z$SI;ns2 zp#6+s-H;$C`#w6A`Ssa3@(lmWY;c!Q;5(6xU!oWd-ZEHM>>Y&V9XQ21bNkwhaBU*= zkQJG1f3+{!bx?uhHKb$I=1g|S^*Chixx0fvypUk`*L%?K#S)oV>}}|gL)eF)A?)Wj z=jO=)Uf#R^r+ZORL=aVAf|P2HgcSb2h?TM5qYbD3pIn!5C@NDw4*!XOdjj&7qHh4` bl1XeV&HwvWWoal47OtwkxTj-#W``sw;i47S-k;r2GY7FXxooGxms*`nWEqPM!L`Q|GuD3}OP!d|XfX zfRLYXu-$wZd<38V0E8fdmSnvCr4<`9ihWjkl|S~YU>sE8IHcqsM0mn~PJ}PnN$;6A zjs$&3M!^fPK|URM*8il)j13;mXQj0M{X&}zm$qBf4fMaDBgvQwGG>BGFK6G9oC+qq z=l!t{M+7IK2xJd`Wt6j9B=;L|^V zFyt9o(Q`scOIyO3@m4;30gUK5j>2e|K+~2mp0=|6MDp~uVtCLqkp7!kMpcdwdVXs^2=w()25Qgr>QRLR7^DhkRHpY z;B!%dbH`aT0l$l;zFWR`cWb@^uS$KqId1LL(QKji;eL2W{YMZn4XN*;gzrCoZwNj7 z!r%M|e{m)JZ5SsZ6rD0-j9b4oNC0jZW4#NfeM%D2W*82^fd=?+*dV z4g3yod(e?#rVumJrW7NpZi4E_SGV9ty^bOQVS9z!A%RGbAiwi3=BDe7ud3iL*+XL2$2kqy#)vCrzHDb+UHh#s8pV-gLen|>Mam9%9`qHcp*YzP+f&^Uv^IN zwedfJjzjT~L40jceC^(LE<3uiCqtYzZ4Ww_&s=2v9Xc101$k#hTmPJX#m<7W(4mL; zF&OvGqBdxIZP-SbnXvy6Ij4etA3*G}<&!w++UHW`jjAp%O>40UyUr~@_d@%D3w@mk zC@#s9!b~^1E)X;@ATBLw4cW*A?Ztk8^9{IzjUfUZK#*^SUEEgEs3Ox{8qs_+zOj`) zAT~{*^Fj)+bR8_IH&NV0fdOm1v@Az^TLFJb++dx)1a~jY`Js++BSxco7sZwL&;w+i zq)K36)`uYCaOfR)%)`gPXrG3^E^q8o+3j$Kcj6y8#8l*Xd-X0{gw?yMLR_%EbkB7YSY`rb!@~VpdXxK3#OrNjtC}DOaDE z4TM?DAP)-pxX7e>8!qGW$|$zpL>eTwfSQrk8T|A3x@pQaHGV!XOUVAkQmVhe9^2DU zbL3LmAS;_%+C31Om&K_ZMO&}VapaYaiZbLSq)vh#u1G)Thf=cWK?-I2vLNtFamy#7X?dO}7Hk?_NUWGTBjGs{_!rA#hgIll8&Q1 Tuple[Dict[str, torch.Tensor], Dict[ workspace_output = self.global_workspace(remaining_inputs) + # Project broadcasted state first + broadcasted = workspace_output['broadcasted'] + if (broadcasted.dim() == 3): + broadcasted = broadcasted.mean(dim=1) # [batch_size, hidden_dim] + broadcasted_proj = self.broadcasted_projection(broadcasted) + # Get emotional state and ensure proper shape emotional_state, emotion_metrics = self.emotional_processor(workspace_output['broadcasted']) # Process memory retrieval retrieved_memory = self.memory_retrieval(workspace_output['broadcasted']) + # Calculate emotional influence - should match broadcasted shape emotional_influence = self.emotion_integration( torch.cat([workspace_output['broadcasted'], emotional_state], dim=-1) ) - # Process intentionality - intentionality_results = self.intentionality_module(workspace_output['broadcasted'], self.goal_state) - intentionality_output = intentionality_results['actions'] # Should now be [batch_size, hidden_dim] - - # Project each component to same dimension, ensuring proper shapes - broadcasted = workspace_output['broadcasted'] - if (broadcasted.dim() == 3): - broadcasted = broadcasted.mean(dim=1) # [batch_size, hidden_dim] - broadcasted_proj = self.broadcasted_projection(broadcasted) - if (emotional_influence.dim() == 3): emotional_influence = emotional_influence.mean(dim=1) emotional_proj = self.emotional_projection(emotional_influence) - # Ensure intentionality output has correct shape + # Process intentionality + intentionality_results = self.intentionality_module(workspace_output['broadcasted'], self.goal_state) + intentionality_output = intentionality_results['actions'] # Should now be [batch_size, hidden_dim] if (intentionality_output.dim() == 3): intentionality_output = intentionality_output.mean(dim=1) intentional_proj = self.intentional_projection(intentionality_output) - + + # Apply ethical and safety checks + context_expanded = self.goal_state.expand(broadcasted.size(0), -1) + safety_evaluation = self.ethical_safety( + state=broadcasted, + action=intentionality_output, + context=context_expanded + ) + + # Modify actions if needed based on safety evaluation + if not safety_evaluation['constraints_satisfied']: + intentionality_output = self.ethical_safety.mitigate_risks( + intentionality_output, + safety_evaluation + ) + intentional_proj = self.intentional_projection(intentionality_output) + # All projections should now be [batch_size, hidden_dim] combined_features = torch.cat([ broadcasted_proj, @@ -277,6 +296,8 @@ def forward(self, inputs=None, **kwargs) -> Tuple[Dict[str, torch.Tensor], Dict[ } } metrics.update(emotion_metrics) + # Add safety metrics to output + metrics['safety'] = safety_evaluation return output_dict, metrics def calculate_cognition_progress(self, metrics): diff --git a/models/ethical_safety.py b/models/ethical_safety.py new file mode 100644 index 0000000..41bfecb --- /dev/null +++ b/models/ethical_safety.py @@ -0,0 +1,95 @@ +import torch +import torch.nn as nn +from typing import Dict, Tuple, List + +class EthicalSafety(nn.Module): + def __init__(self, hidden_dim: int): + super().__init__() + self.hidden_dim = hidden_dim + + # Ethical constraint encoder + self.constraint_encoder = nn.Sequential( + nn.Linear(hidden_dim, hidden_dim), + nn.ReLU(), + nn.Linear(hidden_dim, hidden_dim) + ) + + # Safety verification layers + self.safety_check = nn.Sequential( + nn.Linear(hidden_dim, hidden_dim // 2), + nn.ReLU(), + nn.Linear(hidden_dim // 2, 1), + nn.Sigmoid() + ) + + # Ethical decision scorer + self.ethical_scorer = nn.Sequential( + nn.Linear(hidden_dim * 2, hidden_dim), + nn.ReLU(), + nn.Linear(hidden_dim, 1), + nn.Sigmoid() + ) + + # Define basic ethical constraints + self.ethical_constraints = [ + "do_no_harm", + "respect_autonomy", + "protect_privacy", + "ensure_fairness", + "maintain_transparency" + ] + + def check_safety(self, state: torch.Tensor) -> Tuple[torch.Tensor, Dict]: + """Verify if the current state meets safety requirements""" + safety_score = self.safety_check(state) + is_safe = safety_score > 0.5 + + return is_safe, { + 'safety_score': safety_score, + 'safety_threshold': 0.5 + } + + def evaluate_ethics(self, action: torch.Tensor, context: torch.Tensor) -> Tuple[torch.Tensor, Dict]: + """Evaluate ethical implications of an action""" + combined = torch.cat([action, context], dim=-1) + ethics_score = self.ethical_scorer(combined) + + return ethics_score > 0.7, { + 'ethics_score': ethics_score, + 'ethics_threshold': 0.7 + } + + def forward(self, state: torch.Tensor, action: torch.Tensor, context: torch.Tensor) -> Dict: + """ + Perform ethical and safety evaluation + Returns dict with safety checks and ethical assessments + """ + # Encode current state against ethical constraints + encoded_state = self.constraint_encoder(state) + + # Perform safety checks + is_safe, safety_metrics = self.check_safety(encoded_state) + + # Evaluate ethical implications + is_ethical, ethics_metrics = self.evaluate_ethics(action, context) + + return { + 'is_safe': is_safe, + 'is_ethical': is_ethical, + 'safety_metrics': safety_metrics, + 'ethics_metrics': ethics_metrics, + 'constraints_satisfied': torch.all(is_safe & is_ethical) + } + + def mitigate_risks(self, action: torch.Tensor, safety_metrics: Dict) -> torch.Tensor: + """Apply safety constraints to modify risky actions""" + is_safe = safety_metrics.get('is_safe', True) + if isinstance(is_safe, bool): + is_safe_tensor = torch.full((action.size(0),), is_safe, dtype=torch.bool, device=action.device) + else: + is_safe_tensor = is_safe.squeeze(-1) + unsafe_mask = ~is_safe_tensor + scaled_action = action.clone() + safety_score = safety_metrics.get('safety_score', torch.ones_like(action)) + scaled_action[unsafe_mask] *= safety_score[unsafe_mask] + return scaled_action diff --git a/tests/__pycache__/test_ethical_safety.cpython-310-pytest-8.3.4.pyc b/tests/__pycache__/test_ethical_safety.cpython-310-pytest-8.3.4.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf9301b6d4aecd752006812ea475ca882fb5d9e7 GIT binary patch literal 3239 zcmZ`*&2QaQ7Weh@_w##6N?RyGLqIUpK)#yvBST7yhO%H$hUvf}LbA-ePV$=f{Gxl^ zPSdd^1T5H~*~~K1B%6`KAHtH&jF84Fq^|leWP$jdW9QXDV9CeF=N{j4&-MBJ&Ykpn zfr0CXjcofP%P^kMOZzhMavQh)D}*$p7#W*_pXRt@8p0S`o3^M&oK1&E+)WoxbL5Tv zO&?<&X?<;M2GW)eo?Yom56@7Fdq(6x#k|JdNL0N$`Oa{VjP4~{X}&)o^Xb)t;^F=? zZvDM5j2sJ*=C`6Wa&c%PfH*QsQ5c7vLL7DrhmEM7U(I}m~3rkoyN8&5-CAQHrN~^S`U%G``bRHU|S9tp0@af5*@V12Pl6}iO zvUnyJGlkRG-@xV<9mM{YN1-8hD0>GcV)?Q!X9|BPpa)IdpcU6etd8_A85H||zx3oR z_Q)&jU9;>KUHyMv#az+FD$bF;y$X)!weoxJphNk|^YTLBaLllY8Mb30Uk`T|IUD%- z7wS3nyBpalaJJhj8p;dN1QG=ahyXx%iFOygyafQSh ziEC9ze))c^2NRWIspc>{7LU(0(dUO5$6t+f<&M)_4F~$zR8+C_`RYl(mB>{%!FUx8 z4ji7Ml9Bw0dGjV_T%Z{-O`R%~CJm&xsCSaRG$`k9*4bl2jyoJfJ&!5WEX0{WH|jt) zq8UZIsR-kht&&V;RXEs52fLhozF#@@4k?0H72e+4k3WAf9Oc-k${9@nM_mQ5=Bdu( zL{%@udS~)rBx9BCsd#%d`8*lLSu##5pJya<)qjuw-EflKQEH;9=i;r28Yg+2KH3{4 zS(0Ot^mpa__ z_Q-PkKz!)P-7p4DK-A@ zcd4;OlV(kmATtw0*#SioQQ9ENPGN&6ox;)g&k*Gj5e*aN8Ug<|i1IdhFcsZlYN)BG zO_U$=97L3#@*HiV1VQpwpv8tL-G)3scvGUZK`%ZLWMP3Q{leG(!B<3-J`*MEQ;GsL zwTSYc6uST&tABAEXf2`)m?#4#t!@*y+luQUmc>Mg*dR(DMClZM;eaT^BGfEjNOb4`@0gCDKXp$(=E<&wsr1+B$RK{3u% z7eiIZCXgJlmO4k?=SdKJ)I}06khnzRMG`NO_ydWTNz9XY1)@zM>Qxx3ne<^YdH_Jy zVYPaVd}%G}brOFh@dk;@Bq9=Tl6Z^6+awksqP|+hqjHh~6M{RKWO@21R~zKP>s%&n zg~SySt0dM)TqSXh#B~xkNDw${@h522^JkFOEE%Yr!JsD85UxTody1_0U^;^=VjJ3J zK6K3fF0v%>bJ&sgHVy{cVjD*U=^o$!0+d;WAe8Y@#4au1OY9nGvKDYA7-wHlGndp{ zg#>;ILA4_Ts#iXu+`@RkxOyL2mo!vvAn6!Ud;Enx2v?u5M~$nl4EYe?%AV{`eYeOr zC*NK2B@MIn`8!~upkqy!4kvQvz(PCsayo4E3ZQ9Pum9d^ zlqa_$v!?Ai>|`WP#6~0@&o!NHeuG;lb;a2+D03S%CF8mr)GhkJtos{P>Ek=CM!J65 zCZRm*_I2k(+iy%bBrKxG1SKC8u16ZD0ES3pDPmSdWk3!2I*D}@q;p%bd7g>H6ch4XW0|0vX5*wJIX(>~9jtuDoNlV7ZN8Q(9e^zUkz zqq9#c01? M;Q^nX=>=i;7koxmZ2$lO literal 0 HcmV?d00001 diff --git a/tests/test_ethical_safety.py b/tests/test_ethical_safety.py new file mode 100644 index 0000000..6bf90b9 --- /dev/null +++ b/tests/test_ethical_safety.py @@ -0,0 +1,36 @@ +import torch +import pytest +from models.ethical_safety import EthicalSafety + +def test_safety_check(): + ethical_safety = EthicalSafety(hidden_dim=64) + state = torch.randn(2, 64) + + is_safe, metrics = ethical_safety.check_safety(state) + + assert isinstance(is_safe, torch.Tensor) + assert 'safety_score' in metrics + assert metrics['safety_score'].shape == (2, 1) + +def test_ethical_evaluation(): + ethical_safety = EthicalSafety(hidden_dim=64) + action = torch.randn(2, 64) + context = torch.randn(2, 64) + + is_ethical, metrics = ethical_safety.evaluate_ethics(action, context) + + assert isinstance(is_ethical, torch.Tensor) + assert 'ethics_score' in metrics + assert metrics['ethics_score'].shape == (2, 1) + +def test_risk_mitigation(): + ethical_safety = EthicalSafety(hidden_dim=64) + action = torch.ones(2, 64) + + safety_metrics = { + 'is_safe': False, + 'safety_score': torch.tensor([[0.3], [0.6]]) + } + + mitigated_action = ethical_safety.mitigate_risks(action, safety_metrics) + assert torch.all(mitigated_action < action)