-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.cpp
201 lines (173 loc) · 5.85 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#include "main.h"
PSP_MODULE_INFO("mls", 0, 1, 1);
PSP_HEAP_SIZE_KB(-1024);
PSP_MAIN_THREAD_ATTR(PSP_THREAD_ATTR_VFPU | PSP_THREAD_ATTR_USER);
// make sure to align cached shared variables to 64
volatile u32* mem __attribute__((aligned(64))) = nullptr;
volatile bool meStart __attribute__((aligned(64))) = false;
#define mutex vrg(0xbc100048) // non-cached kernel mutex
// kernel function to unlock the mutex
__attribute__((noinline, aligned(4)))
int unlock() {
// if acquired, briefly holds the lock with a pipeline delay,
// allowing cache operations to complete, could be useful
asm volatile("nop; nop; nop; nop; nop; nop; nop;");
mutex = 0;
asm volatile("sync");
// provides opportunities for others with a pipeline delay
asm volatile("nop; nop; nop; nop; nop; nop; nop;");
return 0;
}
// kernel function that waits and attempts to lock and acquire the mutex
__attribute__((noinline, aligned(4)))
int lock() {
const u32 unique = getlocalUID();
do {
mutex = unique; // the main CPU can affect only bit[0] (0b01), while the Me can only affect bit[1] (0b10)
asm volatile("sync");
if (!(((mutex & 3) ^ unique))) { // see note
return 0; // lock acquired
}
// gives a breath with a pipeline delay (7 stages)
asm volatile("nop; nop; nop; nop; nop; nop; nop;");
} while (1);
return 1;
}
// kernel function to attempt locking and acquiring the mutex
__attribute__((noinline, aligned(4)))
int tryLock() {
const u32 unique = getlocalUID();
mutex = unique;
asm volatile("sync");
if (!(((mutex & 3) ^ unique))) { // see note
return 0; // lock acquired
}
asm volatile("sync"); // make sure to be sync before leaving kernel mode
return 1;
}
// note:
// it appears that the main CPU can read the mutex and only set bit[0],
// while the Me can read the mutex and only set bit[1]
//
// mutex unique
// 11 xor 01 => not 10 = 0
// 11 xor 10 => not 01 = 0
// 10 xor 01 => not 11 = 0
// 10 xor 10 => not 00 = 1
// 01 xor 01 => not 00 = 1
// 01 xor 10 => not 11 = 0
__attribute__((noinline, aligned(4)))
static int meLoop() {
// read meStart using the uncached mask, wait until the signal is received
// from the main CPU and ensure that the shared mem is ready
do {
meDCacheWritebackInvalidAll();
} while(!vrg(0x40000000 | (u32)&meStart) || !mem);
do {
// invalidate cache, forcing next read to fetch from memory
meDCacheInvalidRange((u32)mem, sizeof(u32)*4);
lock();
mem[0]++;
if (mem[1] > 100) {
mem[1] = 0;
}
unlock();
// write modified cache data back to memory
meDCacheWritebackRange((u32)mem, sizeof(u32)*4);
} while(!_meExit);
return _meExit;
}
extern char __start__me_section;
extern char __stop__me_section;
__attribute__((section("_me_section"), noinline, aligned(4)))
void meHandler() {
vrg(0xbc100050) = 0x7f; // enable clocks: ME, AW bus RegA, RegB & Edram, DMACPlus, DMAC
vrg(0xbc100004) = 0xffffffff; // clear NMI
vrg(0xbc100040) = 1; // allow 32MB ram
asm volatile("sync");
((FCall)_meLoop)();
}
static int initMe() {
memcpy((void *)0xbfc00040, (void*)&__start__me_section, me_section_size);
// Call meLoop, it is safer to invoke it with a kernel mask when using interrupts or spinlocks
_meLoop = 0x80000000 | (u32)&meLoop;
meDCacheWritebackInvalidAll();
// reset and start me
vrg(0xBC10004C) = 0b100;
asm volatile("sync");
vrg(0xBC10004C) = 0x0;
asm volatile("sync");
return 0;
}
// function used to hold the mutex in the main loop as a proof
bool releaseMutex() {
static u32 hold = 100;
if (hold-- > 0) {
return false;
}
hold = 100;
return true;
}
void exitSample(const char* const str) {
pspDebugScreenClear();
pspDebugScreenSetXY(0, 1);
pspDebugScreenPrintf(str);
sceKernelDelayThread(1000000);
sceKernelExitGame();
}
int main() {
scePowerSetClockFrequency(333, 333, 166);
pspDebugScreenInit();
if (pspSdkLoadStartModule("ms0:/PSP/GAME/me/kcall.prx", PSP_MEMORY_PARTITION_KERNEL) < 0){
exitSample("Can't load the PRX, exiting...");
return 0;
}
// Init me before user mem initialisation
kcall(&initMe);
// to use DCWBInv Range, 64-byte alignment is required (not necessary while using DCWBInv All)
mem = (u32*)memalign(64, (sizeof(u32) * 4 + 63) & ~63);
memset((void*)mem, 0, sizeof(u32) * 4);
sceKernelDcacheWritebackAll();
SceCtrlData ctl;
u32 counter = 0;
bool switchMessage = false;
// start the process on the Me just before the main loop
vrg(0x40000000 | (u32)&meStart) = true;
do {
// invalidate cache, forcing next read to fetch from memory
sceKernelDcacheInvalidateRange((void*)mem, sizeof(u32) * 4);
// functions that use spinlock, seem to need to be invoked with a kernel mask
if(!kcall((FCall)(0x80000000 | (u32)&tryLock))) {
switchMessage = false;
if (mem[1] > 50) {
switchMessage = true;
}
mem[2]++;
mem[1]++;
// sceKernelDelayThread(10000);
// proof to visualize the release of the mutex and its effect on the counter (mem[0]) running on the Me
if (releaseMutex()) {
kcall((FCall)(0x80000000 | (u32)&unlock));
}
}
// push cache to memory and invalidate it, refill cache during the next access
sceKernelDcacheWritebackInvalidateRange((void*)mem, sizeof(u32) * 4);
sceCtrlPeekBufferPositive(&ctl, 1);
pspDebugScreenSetXY(0, 1);
pspDebugScreenPrintf(" ");
pspDebugScreenSetXY(0, 1);
pspDebugScreenPrintf("Counters %u; %u; %u; %u", mem[0], mem[1], mem[2], counter++);
pspDebugScreenSetXY(0, 2);
if (switchMessage) {
pspDebugScreenPrintf("Hello!");
} else {
pspDebugScreenPrintf("xxxxxx");
}
sceDisplayWaitVblankStart();
} while(!(ctl.Buttons & PSP_CTRL_HOME));
// exit me
meExit();
free((void*)mem);
exitSample("Exiting...");
return 0;
}