Skip to content

Commit

Permalink
add kmer match and query ratio search options
Browse files Browse the repository at this point in the history
  • Loading branch information
zorino committed Jul 17, 2020
1 parent ba58948 commit a00c02f
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 12 deletions.
18 changes: 18 additions & 0 deletions api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ func searchFastq(w http.ResponseWriter, r *http.Request) {
OutFormat: "tsv",
MaxResults: 10,
ExtractPositions: false,
MinKMatch: 10,
MinKRatio: 0.05,
SubMatrix: "blosum62",
GapOpen: 11,
GapExtend: 1,
Expand All @@ -170,6 +172,8 @@ func searchNucleotide(w http.ResponseWriter, r *http.Request) {
OutFormat: "tsv",
MaxResults: 10,
ExtractPositions: false,
MinKMatch: 10,
MinKRatio: 0.05,
SubMatrix: "blosum62",
GapOpen: 11,
GapExtend: 1,
Expand All @@ -196,6 +200,8 @@ func searchProtein(w http.ResponseWriter, r *http.Request) {
OutFormat: "tsv",
MaxResults: 10,
ExtractPositions: false,
MinKMatch: 10,
MinKRatio: 0.05,
SubMatrix: "blosum62",
GapOpen: 11,
GapExtend: 1,
Expand Down Expand Up @@ -278,6 +284,18 @@ func parseSearchOptions(searchOpts *search.SearchOptions, w http.ResponseWriter,
searchOpts.Align = true
}

if r.FormValue("minkmatch") != "10" {
if minKMatch, err := strconv.ParseInt(r.FormValue("minkmatch"), 10, 64); err == nil {
searchOpts.MinKMatch = minKMatch
}
}

if r.FormValue("minkratio") != "0.05" {
if minKRatio, err := strconv.ParseFloat(r.FormValue("minkratio"), 64); err == nil {
searchOpts.MinKRatio = minKRatio
}
}

if strings.ToLower(r.FormValue("sub-matrix")) != "blosum62" {
searchOpts.SubMatrix = strings.ToLower(r.FormValue("sub-matrix"))
}
Expand Down
7 changes: 7 additions & 0 deletions cmd/kaamer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ func main() {
// aln options
-mink minimum number of k-mer matches to report a hit (default: 10)
-minr minimum ratio of query k-mer matches to report a hit (default: 0.05)
-mat substitution matrix (default: BLOSUM62)
-gop gap open penalty (default: 11)
-gex gap extension penalty (default: 1)
Expand All @@ -90,6 +93,8 @@ func main() {
var addAnnotation = flag.Bool("ann", false, "add annotation flag")
var addPositions = flag.Bool("pos", false, "add position flag")

var minKMatch = flag.Int64("mink", 10, "minimum number of k-mer matches to report a hit")
var minKRatio = flag.Float64("minr", 0.05, "minimum ratio of query k-mer matches to report a hit")
var subMatrix = flag.String("mat", "blosum62", "substitution matrix")
var gapOpen = flag.Int("gop", 11, "gap open penalty")
var gapExtend = flag.Int("gex", 1, "gap extension penalty")
Expand Down Expand Up @@ -163,6 +168,8 @@ func main() {
options.Align = *addAlignment
options.ExtractPositions = *addPositions
options.Annotations = *addAnnotation
options.MinKMatch = *minKMatch
options.MinKRatio = *minKRatio
options.SubMatrix = *subMatrix
options.GapOpen = *gapOpen
options.GapExtend = *gapExtend
Expand Down
16 changes: 10 additions & 6 deletions docs/client.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ The kaamer CLI is a client to query (-search) a kaamer database.

// aln options

-mink minimum number of k-mers match to report a hit (default: 10)
-minr minimum ratio of query k-mers matches to report a hit (default: 0.05)

-mat substitution matrix (default: BLOSUM62)
-gop gap open penalty (default: 11)
-gex gap extension penalty (default: 1)
Expand All @@ -67,7 +70,7 @@ The kaamer CLI is a client to query (-search) a kaamer database.
* -g Genetic Code

Genetic code number for translated search (with -t fastq or -t nt) \
One of the following : 1-15 except 7,8 (default 11 - bacteria) \
One of the following : 1-15 except 7,8 (default: 11 - bacteria) \
See https://www.bioinformatics.org/JaMBW/2/3/TranslationTables.html

* -i Input File
Expand All @@ -76,7 +79,7 @@ The kaamer CLI is a client to query (-search) a kaamer database.

* -m Max Results

Maximum number of results to return (default 10)
Maximum number of results to return (default: 10)

* -o Outpout

Expand All @@ -92,18 +95,19 @@ The kaamer CLI is a client to query (-search) a kaamer database.

* -ann Hit Annotations

Add hit annotations output (default false)
Add hit annotations output (default: false)

* -pos Positions Match

Add the positions that has a match with the hit (default false)
Add the positions that has a match with the hit (default: false)

##### Alignment Options

* -mink Minimum number of k-mers match to report a hit (default: 10)
* -minr Minimum ratio of query k-mers matches to report a hit (default: 0.05)

* -mat Substitution matrix (default: BLOSUM62)

* -gop Gap open penalty (default: 11)

* -gex Gap extension penalty (default: 1)


Expand Down
12 changes: 8 additions & 4 deletions pkg/search/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ const (
)

var (
dbStats kvstore.KStats
kMatchRatio = 0.05 // at least 5% of kmer hits (on query)
minKMatch = int64(10) // at least 10 kmer hits
dbStats kvstore.KStats
// kMatchRatio = 0.05 // at least 5% of kmer hits (on query)
// minKMatch = int64(10) // at least 10 kmer hits
)

type SearchOptions struct {
Expand All @@ -66,6 +66,8 @@ type SearchOptions struct {
SubMatrix string
GapOpen int
GapExtend int
MinKMatch int64
MinKRatio float64
}

type SearchResults struct {
Expand Down Expand Up @@ -186,11 +188,13 @@ func NewSearchResult(searchOptions SearchOptions, _dbStats kvstore.KStats, kvSto

func (queryResult *QueryResult) FilterResults(searchOptions SearchOptions) {

fmt.Printf("KmerMatch: %d | KmerRatio: %f \n", searchOptions.MinKMatch, searchOptions.MinKRatio)

var hitsToDelete []uint32
var lastGoodHitPosition = len(queryResult.SearchResults.Hits) - 1

for i, hit := range queryResult.SearchResults.Hits {
if (float64(hit.Kmatch)/float64(queryResult.Query.SizeInKmer)) < kMatchRatio || hit.Kmatch < minKMatch {
if (float64(hit.Kmatch)/float64(queryResult.Query.SizeInKmer)) < searchOptions.MinKRatio || hit.Kmatch < searchOptions.MinKMatch {
if lastGoodHitPosition == (len(queryResult.SearchResults.Hits) - 1) {
lastGoodHitPosition = i - 1
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/search/search_fastq.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func FastqSearch(searchOptions SearchOptions, kvStores *kvstore.KVStores, nbOfTh
wgMP.Wait()

searchRes.Hits = sortMapByValue(searchRes.Counter.GetCountersMap())
if len(searchRes.Hits) > 0 && searchRes.Hits[0].Kmatch >= minKMatch {
if len(searchRes.Hits) > 0 && searchRes.Hits[0].Kmatch >= searchOptions.MinKMatch {
qR = QueryResult{Query: q, SearchResults: searchRes, HitEntries: map[uint32]kvstore.Protein{}}
SetBestStartCodon(&qR)
qR.FilterResults(searchOptions)
Expand Down
2 changes: 1 addition & 1 deletion pkg/search/search_nucleotide.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ func NucleotideSearch(searchOptions SearchOptions, kvStores *kvstore.KVStores, n
wgMP.Wait()

searchRes.Hits = sortMapByValue(searchRes.Counter.GetCountersMap())
if len(searchRes.Hits) > 0 && searchRes.Hits[0].Kmatch >= minKMatch {
if len(searchRes.Hits) > 0 && searchRes.Hits[0].Kmatch >= searchOptions.MinKMatch {
qR := QueryResult{Query: q, SearchResults: searchRes, HitEntries: map[uint32]kvstore.Protein{}}
SetBestStartCodon(&qR)
qR.FilterResults(searchOptions)
Expand Down
2 changes: 2 additions & 0 deletions pkg/searchcli/searchcli.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ func NewSearchRequest(options SearchRequestOptions) {
bodyWriter.WriteField("align", strconv.FormatBool(options.Align))
bodyWriter.WriteField("annotations", strconv.FormatBool(options.Annotations))
bodyWriter.WriteField("positions", strconv.FormatBool(options.ExtractPositions))
bodyWriter.WriteField("minkmatch", strconv.FormatInt(options.MinKMatch, 10))
bodyWriter.WriteField("minkratio", fmt.Sprintf("%f", options.MinKRatio))
bodyWriter.WriteField("sub-matrix", options.SubMatrix)
bodyWriter.WriteField("gap-open", strconv.Itoa(options.GapOpen))
bodyWriter.WriteField("gap-extend", strconv.Itoa(options.GapExtend))
Expand Down

0 comments on commit a00c02f

Please sign in to comment.