Skip to content

Commit

Permalink
postpone samples tokenization
Browse files Browse the repository at this point in the history
  • Loading branch information
gereoffy committed Nov 8, 2023
1 parent 280fa63 commit ca64bc8
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions torch_eval.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#! /usr/bin/python3

import time,os,sys
import time,os,sys,glob
from model import DeepSpam

eval_bs=1024
samples=None

ds=DeepSpam(device="cuda",load=None,ds1=False)

Expand All @@ -14,20 +15,18 @@ def loadtokens(path):
texts.append(t[:1024].split("|",1))
return ds.tokenize(ds.preprocess(texts),ds.MAX_BLOCK)

samples=[ loadtokens("SPAM-test.txt"), loadtokens("HAM-test.txt") ]


for fnev in sys.argv[1:] if len(sys.argv)>1 else ["model/deepspam.pt"]:
for fnev in sys.argv[1:] if len(sys.argv)>1 else glob.glob('model/deepspam*.pt'):
ds.load(fnev)
a=0;n=0
ok=bad=0
for text in open("Junk.txt","rt"):
a+=(res:=ds(text.split("|",1))) ; n+=1
if len(sys.argv)<=2: print("%6.3f%%"%res,text[:128])
if len(sys.argv)==2: print("%6.3f%%"%res,text[:128])
if res>80: ok+=1
elif res<20: bad+=1
# if len(sys.argv)>2: print("%d/%d avg: %5.3f [%s]"%(bad,ok,a/n,fnev)); continue
# eval
if not samples: samples=[ loadtokens("SPAM-test.txt"), loadtokens("HAM-test.txt") ]
t0=time.time()
cnt={}
for i in range(2):
Expand Down

0 comments on commit ca64bc8

Please sign in to comment.