-
Notifications
You must be signed in to change notification settings - Fork 0
/
dynamics_br.py
364 lines (316 loc) · 12.7 KB
/
dynamics_br.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
import numpy as np
from stockfish import Stockfish
import io
#examples of L_starts (Location matrix)
#L = np.array([[1,0,7],[3,3,3]])
#L = np.array([[1,0,6],[3,3,3]])
#L = np.array([[1,2,4],[6,4,0]])
# L = np.array([[2,3,5],[6,4,2]])
#For different location matrices change states() accordingly
#Selection matrix
S = {"P": np.array([1,0,0]),
"Kw": np.array([0,1,0]),
"Kb": np.array([0,0,1])
}
p_moves = {"pfwd": np.array([1,0]),"ppfwd": np.array([2,0])}
k_moves = {"kfwd": np.array([1,0]),
"kbwd": np.array([-1,0]),
"kright": np.array([0,1]),
"kleft": np.array([0,-1]),
"kfr": np.array([1,1]),
"kfl": np.array([1,-1]),
"kbr": np.array([-1,1]),
"kbl": np.array([-1,-1])
}
#It will eventually learn not to do the commented actions in a sufficient number of games,
# but in order to reduce the number of possible states, we simply prevent it from doing them.
kb_moves = {"kfwd": np.array([1,0]),
"kbwd": np.array([-1,0]),
"kright": np.array([0,1]),
"kleft": np.array([0,-1]),
"kfr": np.array([1,1]),
"kfl": np.array([1,-1]),
"kbr": np.array([-1,1]),
"kbl": np.array([-1,-1])
}
#stoc = Stockfish(path="C:/Users/pc/Desktop/UNIMI- DSE/First year/Reinforcement Learning/Project/stockfish_15.1_win_x64_popcnt/stockfish-windows-2022-x86-64-modern")
#stoc = Stockfish(path="C:/Users/Guido/Desktop/RL/Project/stockfish_15.1_win_x64_popcnt/stockfish-windows-2022-x86-64-modern")
#---------------------------------------------
#-----------DYNAMICS FUNCTIONS----------------
#---Regulating a single agent (white player)--
#---------------------------------------------
def init_game(L, queen = False):
#Inspired by: https://stackoverflow.com/questions/56754543/generate-chess-board-diagram-from-an-array-of-positions-in-python
'''
Visualise the current state (positions on the chessboard).
- Input: Location matrix. If queen = True it generates the fen with the pawn promoted to queen.
- Output: Corresponding fen notation.
'''
fen_matrix = [["em" for c in range(8)] for r in range(8)]
if queen:
fen_matrix[L[0][0]][L[1][0]] = "wq"
else:
fen_matrix[L[0][0]][L[1][0]] = "wp"
fen_matrix[L[0][1]][L[1][1]] = "wk"
fen_matrix[L[0][2]][L[1][2]] = "bk"
# Use StringIO to build string more efficiently than concatenating
with io.StringIO() as s:
for row in range(len(fen_matrix)-1,-1,-1):
empty = 0
for cell in range(0,len(fen_matrix)):
c = fen_matrix[row][cell]
if c[0] in ('w', 'b'):
if empty > 0:
s.write(str(empty))
empty = 0
s.write(c[1].upper() if c[0] == 'w' else c[1].lower())
else:
empty += 1
if empty > 0:
s.write(str(empty))
s.write('/')
# Move one position back to overwrite last '/'
s.seek(s.tell() - 1)
return s.getvalue()[:-1]
#---------------------------------------------
def legal_move(L, limit = True):
'''
Retrieve the possible moves from the current state.
- Input: Location matrix. If limit = False the white king can move without restrictions on any column.
- Output: Set of possible moves for the white player from the current state.
'''
lm = set() #Set of possible moves
if L[0][0] == 1 \
and not np.array_equiv((L[:,1] - L[:,0]), np.array([2,0])) and not np.array_equiv((L[:,1] - L[:,0]), np.array([1,0])) \
and not np.array_equiv((L[:,2] - L[:,0]), np.array([2,0])) and not np.array_equiv((L[:,2] - L[:,0]), np.array([1,0])):
lm.add("pfwd")
lm.add("ppfwd")
elif not L[0][0] == 7 \
and not np.array_equiv((L[:,1] - L[:,0]), np.array([1,0])) \
and not np.array_equiv((L[:,2] - L[:,0]), np.array([1,0])):
lm.add("pfwd")
my_list = list(k_moves.values())
my_keys = list(k_moves.keys())
obstacles = np.zeros(shape = (8,8), dtype = int) #matrix containing occupied squares
candidates = np.zeros(shape = (8,8), dtype = int) #matrix containing possible squares for the white king
diff = list()
poss_squares = list() #list containing the possible squares coordinates for white king
#Extract the pieces coordinates to fill the 'obstacles' and 'candidates' matrices
px, py = L[0][0], L[1][0]
kbx, kby = L[0][2], L[1][2]
kwx, kwy = L[0][1], L[1][1]
kw_coord = [kwx, kwy]
#Fill the matrices
obstacles[px][py] = 1
obstacles[kwx][kwy] = 1
for i in range(max(kbx-1,0), min(8,kbx+2)):
for j in range(max(kby-1,0), min(8,kby+2)):
obstacles[i][j] = 1
for i in range(max(kwx-1,0), min(8,kwx+2)):
for j in range(max(kwy-1,0), min(8,kwy+2)):
candidates[i][j] = 10
#Combine the matrices
config = candidates + obstacles
#Limit the columns of the white king (not allowed: a,b,c)
if limit == True:
for j in (0,1,2):
for i in range(8):
config[i][j] += 1
#Extraxt the coordinates
for i in range(0, 8):
for j in range(0,8):
if(config[i][j]== 10):
poss_squares.append([i,j])
for k in range(0, len(poss_squares)):
diff.append(np.array(poss_squares[k]) - np.array(kw_coord))
for i in range(0, len(my_list)):
for j in range(0, len(diff)):
if (diff[j] == my_list[i]).all():
lm.add(my_keys[i])
return lm
#---------------------------------------------
def legal_state(L):
'''
Retrieve the possible future states from the current one.
- Input: Location matrix.
- Output: List of possible future states.
'''
ls = list()
L_primes = list()
a = legal_move(L)
for move in a:
if move in ("pfwd", "ppfwd"):
ls.append(L @ np.atleast_2d(S["P"]).T + np.atleast_2d(p_moves[move]).T)
L_primes.append(L + np.atleast_2d(p_moves[move]).T @ np.atleast_2d(S["P"]))
else:
ls.append(L @ np.atleast_2d(S["Kw"]).T + np.atleast_2d(k_moves[move]).T)
L_primes.append(L + np.atleast_2d(k_moves[move]).T @ np.atleast_2d(S["Kw"]))
return L_primes
#---------------------------------------------
def move(L, action):
'''
Update the current state to reach the future one thanks to a specific white move.
- Input: Location matrix and white move in our notation.
- Output: Future state, move in Stockfish notation and piece moved.
'''
piece = ""
if action in ("pfwd", "ppfwd"):
L_prime = np.zeros(shape = (2,3))
L_prime = L + np.atleast_2d(p_moves[action]).T @ np.atleast_2d(S["P"]) #atleast_2d necessary to perform matix multiplication
piece = "P" #white moved pawn
else:
L_prime = np.zeros(shape = (2,3))
L_prime = L + np.atleast_2d(k_moves[action]).T @ np.atleast_2d(S["Kw"])
piece = "Kw" #white moved king
choosen_move = ita_stock(L, L_prime, piece)
return L_prime, choosen_move, piece
#---------------------------------------------
#
#Not useful in the final version
#
#def reward(L, L_prime):
# '''
# Retrieve the reward associated to the white move.
# - Input: Current location matrix, future state (after white move)
# - Output: Reward (float) associated to the move.
# '''
# r = 0
# if np.array_equiv(L[:,0], L_prime[:,0]): #white player moved the king
# piece = "K"
# if abs(7 - L[0][0]) < abs(L[1][2] - L[1][0]):
# r = 0
# elif abs(7 - L[0][0]) >= abs(L[1][2] - L[1][0]):
# if L_prime[1][1] - L[1][0] < 2 : ##re nella colonna vicino al pedone
# r = 5
# else:
# r = 2
# else: #white player moved the pawn
# piece = "P"
# if L_prime[0][0] == 7:
# r = 200
# else:
# if L_prime[0][0] == L[0][1]: #pedone stessa riga di re
# r = -5
# if abs(7 - L[0][0]) >= abs(L[1][2] - L[1][0]):
# r = r+0
# if dist(L_prime[:,1], L_prime[:,0]) <= np.sqrt(2):
# r = r+3
# else:
# r = r+1
# elif abs(7 - L[0][0]) < abs(L[1][2] - L[1][0]):
# r = r+15
# return piece, r
#
#---------------------------------------------
#
#Not useful in the final version
#
#def successors(L):
# '''
# For all the possible states from the current one, yields a list of possible moves with the associated reward.
# - Input: Current location matrix.
# - Output: [[piece1, move1, r1], [piece2, move2, reward2], ...], where move is in stockfish notation.
# '''
# data = []
# legal_moves = legal_move(L)
# for a in legal_moves:
# L_prime = move(L, a)[0]
# piece = move(L,a)[2]
# s_prime = ita_stock(L, L_prime, piece) #return the action in stockfish notation (Ex:'d2d3')
# r = reward(L, L_prime)[1]
# data.append([piece, s_prime, r])
#
# return data
#
#---------------------------------------------
def black_move(L, black_action):
'''
Update the current state to reach the future one thanks to a specific black move.
- Input: Location matrix and black move in our notation.
- Output: Future state.
'''
L_prime = np.zeros(shape = (2,3))
L_prime = L + np.atleast_2d(kb_moves[black_action]).T @ np.atleast_2d(S["Kb"])
return L_prime
#---------------------------------------------
#---------COMMUNICATION FUNCTIONS-------------
#---------------------------------------------
def stock_ita(s):
'''
Translate the black move from stockfish notation to our notation.
- Input: string in the form 'd2d3'.
- Output: string in the form 'fwd'.
'''
stockita = {"a": 0, "b": 1, "c":2, "d":3, "e": 4, "f": 5, "g": 6, "h": 7 }
a,b,c,d = list(s) #d 2 d 3
kby_move = stockita[c] - stockita[a] #black king movement along y axis
kbx_move = int(d) - int(b) #black king movement along x axis
#our indexes are from 0 to 7, so we must scale his move that ranges from 1 to 8
kb_array = np.array([kbx_move, kby_move])
blk_move_in_notation = [i for i in kb_moves.keys() if (kb_moves[i] == kb_array).all()]
return blk_move_in_notation[0]
#---------------------------------------------
def ita_stock(L,L_prime, piece):
'''
retrieve the move in the stockfish notation (e.g. 'd2d3').
- Input: Current location matrix, future location matrix, moved piece
- Output: string in stockfish notation (e.g. 'd2d3').
'''
itastock = {0: "a", 1: "b", 2:"c", 3:"d", 4: "e", 5: "f", 6: "g", 7: "h" }
if piece == "P":
a,b = L[0][0]+1, L[1][0] #add 1 to the row to match the range.
a_prime,b_prime = L_prime[0][0]+1 ,L_prime[1][0]
else:
a,b = L[0][1]+1, L[1][1]
a_prime,b_prime = L_prime[0][1]+1, L_prime[1][1]
py = itastock[b]
px = str(a)
py_prime = itastock[b_prime]
px_prime = str(a_prime)
return py + px + py_prime + px_prime
#---------------------------------------------
def array_to_int(L, small = False):
'''
trasform an array in an integer by row (e.g: array([[1,0,7],[3,3,3]]) --> 107333).
- Input: Location matrix in array form.
- Output: An integer representing the location matrix.
'''
number = 0
if small == True:
number = number + 1e3*L[0][0]
number = number + 1e2*L[0][1]
number = number + 1e1*L[1][0]
number = number + 1e0*L[1][1]
else:
number = number + 1e5*L[0][0]
number = number + 1e4*L[0][1]
number = number + 1e3*L[0][2]
number = number + 1e2*L[1][0]
number = number + 1e1*L[1][1]
number = number + 1e0*L[1][2]
return int(number)
#---------------------------------------------
def int_to_array(number, small = False):
'''
Trasform an integer in an array (i.e: 107333 --> array([[1,0,7],[3,3,3]]).
- Input: an integer.
- Output: array representing the Location matrix.
'''
if not small:
arr = np.zeros((2,3), dtype = 'int')
l = [int(x) for x in str(number)]
arr[0][0] = l[0]
arr[0][1] = l[1]
arr[0][2] = l[2]
arr[1][0] = l[3]
arr[1][1] = l[4]
arr[1][2] = l[5]
else:
arr = np.zeros((2,2), dtype = 'int')
l = [int(x) for x in str(number)]
arr[0][0] = l[0]
arr[0][1] = l[1]
arr[1][0] = l[2]
arr[1][1] = l[3]
return arr
#---------------------------------------------