This repository has been archived by the owner on Oct 12, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 24
/
sts_model1.py
71 lines (55 loc) · 2.46 KB
/
sts_model1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# coding: utf8
import stst
import sts_tools
# Define Model
gb = stst.Classifier(stst.GradientBoostingRegression())
model = stst.Model('S1-gb', gb)
# avg = stst.Classifier(stst.AverageEnsemble())
# model = stst.Model('S1-avg', avg)
# Add features to the Model
'''several nGramOverlapFeatures'''
model.add(stst.nGramOverlapFeature(type='word'))
model.add(stst.nGramOverlapFeature(type='lemma'))
model.add(stst.nCharGramOverlapFeature(stopwords=False))
model.add(stst.nCharGramOverlapFeature(stopwords=True))
model.add(stst.nGramOverlapBeforeStopwordsFeature(type='word'))
model.add(stst.nGramOverlapBeforeStopwordsFeature(type='lemma'))
model.add(stst.WeightednGramOverlapFeature(type='word'))
model.add(stst.WeightednGramOverlapFeature(type='lemma'))
'''several BOWFeatures'''
model.add(stst.BOWFeature(stopwords=False))
model.add(stst.BOWFeature(stopwords=True))
'''several AlignmentFeatures'''
model.add(stst.AlignmentFeature())
model.add(stst.IdfAlignmentFeature())
model.add(stst.PosAlignmentFeature())
'''several WordEmbeddingFeatures'''
word2vec_file = '/home/junfeng/word2vec/GoogleNews-vectors-negative300.bin'
paragram_file = '/home/junfeng/paragram-embedding/paragram_300_sl999.txt'
glove100_file = '/home/junfeng/GloVe/glove.6B.100d.txt'
glove300_file = '/home/junfeng/GloVe/glove.840B.300d.txt'
model.add(stst.MinAvgMaxEmbeddingFeature('word2vec', 300, word2vec_file, binary=True))
model.add(stst.MinAvgMaxEmbeddingFeature('paragram', 300, paragram_file))
model.add(stst.MinAvgMaxEmbeddingFeature('glove100', 100, glove100_file))
model.add(stst.MinAvgMaxEmbeddingFeature('glove300', 300, glove300_file))
'''several DependencyFeatures'''
model.add(stst.DependencyGramFeature(convey='count'))
model.add(stst.DependencyGramFeature(convey='idf'))
model.add(stst.DependencyRelationFeature(convey='count'))
model.add(stst.DependencyRelationFeature(convey='idf'))
model.add(stst.POSLemmaMatchFeature(stopwords=True))
model.add(stst.POSLemmaMatchFeature(stopwords=False))
model.add(stst.POSNounEmbeddingFeature('word2vec', 300, word2vec_file, binary=True))
model.add(stst.POSNounEditFeature())
model.add(stst.POSTreeKernelFeature())
model.add(stst.Doc2VecGlobalFeature())
# model.add(stst.AsiyaMTFeature())
# model.add(stst.SequenceFeature())
# model.add(stst.SentenceFeature())
# model.add(stst.ShortSentenceFeature())
'''several NegativeFeature'''
# model.add(stst.NegativeFeature())
# sts_tools.feature_importance(model)
sts_tools.train_sts(model)
sts_tools.dev_sts(model)
sts_tools.test_sts(model)