Skip to content

Commit

Permalink
ARM support.
Browse files Browse the repository at this point in the history
  • Loading branch information
mkskeller committed Apr 19, 2021
1 parent 6c89808 commit 0f656fa
Show file tree
Hide file tree
Showing 112 changed files with 1,735 additions and 474 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
[submodule "Programs/Circuits"]
path = Programs/Circuits
url = https://github.com/mkskeller/bristol-fashion
[submodule "simde"]
path = simde
url = https://github.com/simd-everywhere/simde
3 changes: 1 addition & 2 deletions BMR/Key.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
#define COMMON_INC_KEY_H_

#include <iostream>
#include <emmintrin.h>
#include <smmintrin.h>
#include <string.h>

#include "Tools/FlexBuffer.h"
#include "Tools/intrinsics.h"
#include "Math/gf2nlong.h"

using namespace std;
Expand Down
2 changes: 1 addition & 1 deletion BMR/Party.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ void FakeProgramParty::receive_spdz_wires(ReceivedMsg& msg)
spdz_mac_key.unpack(spdz_wires[op].back());
if (!MC)
{
MC = new Passing_MAC_Check<Share<gf2n_long>>(spdz_mac_key);
MC = new MAC_Check_<Share<gf2n_long>>(spdz_mac_key);
cout << "MAC key: " << hex << spdz_mac_key << endl;
mac_key = spdz_mac_key;
}
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
The changelog explains changes pulled through from the private development repository. Bug fixes and small enhancements are committed between releases and not documented here.

## 0.2.4 (Apr 19, 2021)

- ARM support
- Base OTs optionally without SimpleOT/AVX
- Use OpenSSL instead of Crypto++ for elliptic curves
- Post-sacrifice binary computation with replicated secret sharing similar
to [Araki et al.](https://www.ieee-security.org/TC/SP2017/papers/96.pdf)
- More flexible multithreading

## 0.2.3 (Feb 23, 2021)

- Distributed key generation for homomorphic encryption with active security similar to [Rotaru et al.](https://eprint.iacr.org/2019/1300)
Expand Down
32 changes: 25 additions & 7 deletions CONFIG
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ ROOT = .
OPTIM= -O3
#PROF = -pg
#DEBUG = -DDEBUG
#MEMPROTECT = -DMEMPROTECT
GDEBUG = -g

# set this to your preferred local storage directory
Expand All @@ -12,8 +11,8 @@ PREP_DIR = '-DPREP_DIR="Player-Data/"'
# set for SHE preprocessing (SPDZ and Overdrive)
USE_NTL = 0

# set for using GF(2^128) online phase, OT, MASCOT, or BMR
# unset for GF(2^40) online and offline phase
# set for using GF(2^128)
# unset for GF(2^40)
USE_GF2N_LONG = 1

# set to -march=<architecture> for optimization
Expand All @@ -28,6 +27,24 @@ USE_GF2N_LONG = 1
ARCH = -mtune=native -msse4.1 -msse4.2 -maes -mpclmul -mavx -mavx2 -mbmi2 -madx
ARCH = -march=native

MACHINE := $(shell uname -m)
OS := $(shell uname -s)
ifeq ($(MACHINE), x86_64)
# set this to 0 to avoid using AVX for OT
ifeq ($(OS), Linux)
CHECK_AVX := $(shell grep -q avx /proc/cpuinfo; echo $$?)
ifeq ($(CHECK_AVX), 0)
AVX_OT = 1
else
AVX_OT = 0
endif
else
AVX_OT = 1
endif
else
AVX_OT = 0
endif

# allow to set compiler in CONFIG.mine
CXX = g++

Expand All @@ -38,6 +55,10 @@ ifeq ($(USE_GF2N_LONG),1)
GF2N_LONG = -DUSE_GF2N_LONG
endif

ifeq ($(AVX_OT), 0)
CFLAGS += -DNO_AVX_OT
endif

# MAX_MOD_SZ (for FHE) must be least and GFP_MOD_SZ (for computation)
# must be exactly ceil(len(p)/len(word)) for the relevant prime p
# GFP_MOD_SZ only needs to be set for primes of bit length more that 256.
Expand All @@ -51,7 +72,6 @@ ifeq ($(USE_NTL),1)
LDLIBS := -lntl $(LDLIBS)
endif

OS := $(shell uname -s)
ifeq ($(OS), Linux)
LDLIBS += -lrt
endif
Expand All @@ -62,12 +82,10 @@ else
BOOST = -lboost_thread $(MY_BOOST)
endif

CFLAGS += $(ARCH) $(MY_CFLAGS) $(GDEBUG) -Wextra -Wall $(OPTIM) -I$(ROOT) -pthread $(PROF) $(DEBUG) $(MOD) $(MEMPROTECT) $(GF2N_LONG) $(PREP_DIR) $(SECURE) -std=c++11 -Werror
CFLAGS += $(ARCH) $(MY_CFLAGS) $(GDEBUG) -Wextra -Wall $(OPTIM) -I$(ROOT) -pthread $(PROF) $(DEBUG) $(MOD) $(GF2N_LONG) $(PREP_DIR) $(SECURE) -std=c++11 -Werror
CPPFLAGS = $(CFLAGS)
LD = $(CXX)

ECLIB = -lcryptopp

ifeq ($(OS), Darwin)
ifeq ($(USE_NTL),1)
CFLAGS += -Wno-error=unused-parameter
Expand Down
2 changes: 1 addition & 1 deletion Compiler/GC/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ class sbits(bits):
Instances can be also be initalized from :py:obj:`~Compiler.types.regint`
and :py:obj:`~Compiler.types.sint`.
"""
max_length = 128
max_length = 64
reg_type = 'sb'
is_clear = False
clear_type = cbits
Expand Down
2 changes: 2 additions & 0 deletions Compiler/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ def TruncLeakyInRing(a, k, m, signed):
Returns a >> m.
Requires a < 2^k and leaks a % 2^m (needs to be constant or random).
"""
if k == m:
return 0
assert k > m
assert int(program.options.ring) >= k
from .types import sint, intbitint, cint, cgf2n
Expand Down
2 changes: 1 addition & 1 deletion Compiler/dijkstra.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def bubble_up(self, start):
childpos = MemValue(start * shift)
@for_range(self.levels - 1)
def f(i):
parentpos = childpos.right_shift(1, self.levels)
parentpos = childpos.right_shift(1, self.levels + 1)
parent, parent_state = self.heap.read_and_maybe_remove(parentpos)
child, child_state = self.heap.read_and_maybe_remove(childpos)
swap = parent > child
Expand Down
48 changes: 30 additions & 18 deletions Compiler/floatingpoint.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
from math import log, floor, ceil
from Compiler.instructions import *
from . import types
Expand Down Expand Up @@ -411,6 +412,8 @@ def TruncInRing(to_shift, l, pow2m):
return types.sint.bit_compose(reversed(bits))

def SplitInRing(a, l, m):
if l == 1:
return m.if_else(a, 0), m.if_else(0, a), 1
pow2m = Pow2(m, l, None)
upper = TruncInRing(a, l, pow2m)
lower = a - upper * pow2m
Expand Down Expand Up @@ -620,27 +623,36 @@ def BITLT(a, b, bit_length):
def BitDecFull(a):
from .library import get_program, do_while, if_, break_point
from .types import sint, regint, longint
p=int(get_program().options.prime)
p = get_program().prime
assert p
bit_length = p.bit_length()
bbits = [sint(size=a.size) for i in range(bit_length)]
tbits = [[sint(size=1) for i in range(bit_length)] for j in range(a.size)]
pbits = util.bit_decompose(p)
# Loop until we get some random integers less than p
done = [regint(0) for i in range(a.size)]
@do_while
def get_bits_loop():
logp = int(round(math.log(p, 2)))
if abs(p - 2 ** logp) / p < 2 ** -get_program().security:
# inspired by Rabbit (https://eprint.iacr.org/2021/119)
# no need for exact randomness generation
# if modulo a power of two is close enough
bbits = [sint.get_random_bit(size=a.size) for i in range(logp)]
if logp != bit_length:
bbits += [sint(0, size=a.size)]
else:
bbits = [sint(size=a.size) for i in range(bit_length)]
tbits = [[sint(size=1) for i in range(bit_length)] for j in range(a.size)]
pbits = util.bit_decompose(p)
# Loop until we get some random integers less than p
done = [regint(0) for i in range(a.size)]
@do_while
def get_bits_loop():
for j in range(a.size):
@if_(done[j] == 0)
def _():
for i in range(bit_length):
tbits[j][i].link(sint.get_random_bit())
c = regint(BITLT(tbits[j], pbits, bit_length).reveal())
done[j].link(c)
return (sum(done) != a.size)
for j in range(a.size):
@if_(done[j] == 0)
def _():
for i in range(bit_length):
tbits[j][i].link(sint.get_random_bit())
c = regint(BITLT(tbits[j], pbits, bit_length).reveal())
done[j].link(c)
return (sum(done) != a.size)
for j in range(a.size):
for i in range(bit_length):
movs(bbits[i][j], tbits[j][i])
for i in range(bit_length):
movs(bbits[i][j], tbits[j][i])
b = sint.bit_compose(bbits)
c = (a-b).reveal()
t = (p-c).bit_decompose(bit_length)
Expand Down
13 changes: 0 additions & 13 deletions Compiler/instructions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1577,19 +1577,6 @@ class writesocketc(base.IOInstruction):
def has_var_args(self):
return True

@base.vectorize
class writesockets(base.IOInstruction):
"""
Write a variable number of secret shares + MACs from registers into a socket
for a specified client id, message_type
"""
__slots__ = []
code = base.opcodes['WRITESOCKETS']
arg_format = tools.chain(['ci', 'int'], itertools.repeat('s'))

def has_var_args(self):
return True

@base.vectorize
class writesocketshare(base.IOInstruction):
""" Write a variable number of shares (without MACs) from secret
Expand Down
2 changes: 1 addition & 1 deletion Compiler/instructions_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,7 +903,7 @@ class DirectMemoryWriteInstruction(DirectMemoryInstruction, \
WriteMemoryInstruction):
__slots__ = []
def __init__(self, *args, **kwargs):
if program.curr_tape.prevent_direct_memory_write:
if not program.curr_tape.singular:
raise CompilerError('Direct memory writing prevented in threads')
super(DirectMemoryWriteInstruction, self).__init__(*args, **kwargs)

Expand Down
6 changes: 4 additions & 2 deletions Compiler/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -1062,14 +1062,14 @@ def f(i, j):
"""
return for_range_multithread(n_threads, None, n_loops)

def multithread(n_threads, n_items, max_size=None):
def multithread(n_threads, n_items=None, max_size=None):
"""
Distribute the computation of :py:obj:`n_items` to
:py:obj:`n_threads` threads, but leave the in-thread repetition up
to the user.
:param n_threads: compile-time (int)
:param n_items: regint/cint/int
:param n_items: regint/cint/int (default: :py:obj:`n_threads`)
The following executes ``f(0, 8)``, ``f(8, 8)``, and
``f(16, 9)`` in three different threads:
Expand All @@ -1080,6 +1080,8 @@ def multithread(n_threads, n_items, max_size=None):
def f(base, size):
...
"""
if n_items is None:
n_items = n_threads
if max_size is None:
return map_reduce(n_threads, None, n_items, initializer=lambda: [],
reducer=None, looping=False)
Expand Down
3 changes: 3 additions & 0 deletions Compiler/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,9 @@ def _(i):
progress('f input')

def forward(self, batch=None):
if batch is None:
batch = regint.Array(self.N)
batch.assign(regint.inc(self.N))
self.compute_f_input(batch=batch)
if self.activation_layer:
self.activation_layer.forward(batch)
Expand Down
10 changes: 7 additions & 3 deletions Compiler/oram.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,11 @@ def get_slice(self):
for length,start in zip(self.lengths[:-1],series(self.lengths)):
res.append(remainder.mod2m(length, total_length - start, False))
remainder -= res[-1]
remainder /= floatingpoint.two_power(length)
if Program.prog.options.ring:
remainder = remainder.trunc_zeros(length,
total_length - start, False)
else:
remainder /= floatingpoint.two_power(length)
res.append(remainder)
return res
def set_slice(self, value):
Expand Down Expand Up @@ -1498,12 +1502,12 @@ def translate_index(self, index):
rem = mod2m(index, self.log_entries_per_block, log2(self.size), False)
c = mod2m(rem, self.log_entries_per_element, \
self.log_entries_per_block, False)
b = (rem - c).trunc_zeros(self.log_entries_per_element,
b = trunc_zeros(rem - c, self.log_entries_per_element,
self.log_entries_per_block)
if self.small:
return 0, b, c
else:
return (index - rem).trunc_zeros(self.log_entries_per_block,
return trunc_zeros(index - rem, self.log_entries_per_block,
log2(self.size)), b, c
else:
index_bits = bit_decompose(index, log2(self.size))
Expand Down
Loading

0 comments on commit 0f656fa

Please sign in to comment.