-
Notifications
You must be signed in to change notification settings - Fork 0
/
msmClustering.py
103 lines (93 loc) · 2.95 KB
/
msmClustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python
"""
Collection of basic clustering functions and utilities for Hi-C analysis
Part of ChromaWalker package
"""
import numpy as np
import os
import subprocess
import sys
#import matplotlib as mpl
#mpl.use('Agg')
import matplotlib.pyplot as plt
import random
import copy
import matplotlib.gridspec as gridspec
from time import time, sleep
from numpy.linalg import solve
import cPickle as pickle
import matplotlib.backends.backend_pdf
from matplotlib import cm
from matplotlib import collections as mc
#import mpldatacursor
import plotutils as plu
from scipy.stats import norm as statnorm
from scikits.bootstrap import bootstrap
from numpy.linalg import eigvals as eigvalsnp
from operator import itemgetter
import gzip
from Bio import SeqIO
from Bio.SeqUtils import GC
## Basic clustering operations
def _clusterproximity_00(set1, set2, ftot, sizes):
"""
Compute proximity of 2 clusters, by size-weighted interaction counts.
For basic clustering analysis only.
Input:
ftot: Array of total interactions between units
sizes: Sizes of units
Returns:
favgval: Size-weighted (averaged) interaction between the two sets
set1, set2
"""
s1 = np.array(set1)
s2 = np.array(set2)
ntot = np.sum(ftot[s1][:, s2])
favgval = ntot / (np.sum(sizes[s1]) * np.sum(sizes[s2]))
return favgval
def _clusterproximity_01(set1, set2, ftot, sizes):
"""
Compute proximity of 2 clusters, by minimum proximity
between constituent units.
For basic clustering analysis only.
Input:
ftot: Array of total interactions between units
sizes: Sizes of units (not used)
Returns:
fminval: Minimum interaction between units of the two sets
set1, set2
"""
s1 = np.array(set1)
s2 = np.array(set2)
fminval = np.min(ftot[s1][:, s2])
return fminval
def _clustersplit_minprox(favg, cluster, hubs):
"""
Compute min proximity to hubs for points in cluster.
Uses array favg as measure of proximity between individual units.
Note: hubs must only contain 2 points, and be a subset of cluster!
Input:
favg: Average interaction levels between units
cluster: Cluster of units to split
hubs: Proposed hubs for splitting cluster
Returns:
minprox: Min proximity from cluster units to hubs
splitting: (2,) list of indices closer to each hub
Note: Ties a awarded to the first hub
"""
# Check hubs
if len(hubs) != 2:
print 'Incorrect number of hubs: %i!' % len(hubs)
return
if not set(hubs).issubset(set(cluster)):
print 'hubs is not a subset of cluster!'
return
# Compute minprox
cl = np.array(cluster)
hb = np.array(hubs)
favgsub = favg[cl][:, hb]
minprox = np.min(favgsub)
# Find splitting
sub1 = list(cl[favgsub[:, 0] <= favgsub[:, 1]])
sub2 = list(cl[favgsub[:, 0] > favgsub[:, 1]])
return minprox, (sub1, sub2)