forked from gkipkorir/cs465_final_project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhw10.html
163 lines (116 loc) · 5.79 KB
/
hw10.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<script src="http://d3js.org/d3.v3.min.js" charset="utf-8"></script>
<title>HW 10</title>
</head>
<body>
<h1>Literature Fingerprints</h1>
<p>Use this page to upload a collection of different text documents and visualize their "fingerprints". To generate the fingerprint, the text files are broken up into shorter lists of words. For each subset, we draw a small box and set the color of the box based on the average word length for all of the words in that subset.
</p>
<input type="file" id="files" name="files[]" multiple />
<ul id="list"></ul>
<div id="texts"></div>
<script>
// constant to determine how the file gets segmented
var WORDS_PER_BLOCK = 200;
// constant to determine the size of the physical boxes
var BLOCKSIZE = 15;
// width of the svg
var width = 800;
// to make the math easier, we compute width the blocks will actually use
// since we don't want any partial blocks
var realWidth = Math.floor(width/BLOCKSIZE) * BLOCKSIZE;
// stop words are words that we don't want in our analysis because they are
// too common
var stopwords = ["and", "of", "the", "is", "a", "which", "where", "that", "and", "or","if","what","in", "on", "an", "it","to", "too", "was","has"];
// create a color scale using a linear scale
// I used a three value range for a diverging color scale, but you are welcome to
// just use two if you prefer the aesthetics
/**************** FILL IN ******************/
/*
The drawTextVis function draws the fingerprints. It takes in a list of objects,
each representing one of the texts that has been uploaded.
*/
var drawTextVis = function(texts){
// Create a master list of all of the average word lengths to make it
// easy to find the extent of all of the values and the median (which I used
// for the middle value in the color range domain)
/**************** FILL IN ******************/
// set the domain of the color range
/**************** FILL IN ******************/
// create a div for each text document
// use the enter and exit sets to clear out old values
/**************** FILL IN ******************/
// To build the display for a single text document, I created a div
// the div is associated with the data for the associated text document using data()
// Inside, there is a p, which displays the document's name
// there is also an SVG region for the drawing
/**************** FILL IN ******************/
// The SVG region is sized as width, and the height is calculated based on the
// number of rows required
/**************** FILL IN ******************/
// Inside of the SVG, create rects for the boxes, using the list of
// average lengths as the data
// since these are inside of the div holding the text document data,
// you can use the function(d){} in the data() function to fetch the right field
/**************** FILL IN ******************/
// use the index number of the elements and mod (%) to set the x coordinate
// use the index number of the elements and div (/) to set the y coordinate
// width and height are determined by BLOCKSIZE
// fill color is determined by the color scale
/**************** FILL IN ******************/
};
/*
The bundleTextData function takes in a name and the contents of the associated file.
The function should process the text and return an object containing (at minimum),
a list of average word lengths for the segments of the file, and the name of the
file.
*/
var bundleTextData = function(name, text){
// split the text using the regex /\s+/ to make a split on any whitespace
// use map to visit each word, strip off punctuation
// then filter the list to remove words that are now empty or are in the list of stop words
// (if you want to try other metrics, you might also convert it to lowercase, but it isn't
// required for word length)
/**************** FILL IN ******************/
// create a list of average word lengths for each subset of the list of words of
// size WORDS_PER_BLOCK
// there are many ways to do this, I used slice to create sub lists of the correct size
// and then found the average word length using d3.mean()
/**************** FILL IN ******************/
// create an object and return it
/**************** REPLACE ******************/
return {};
};
/*
This function does the voodoo of "uploading" the files into the browser.
You don't need to know exactly what is going on in here. The important
parts are that when the user selects multiple files, this will read those
files in, passing each one to bundleTextData() for you to process. When
they have all been processed, it will then call drawTextVis() with a list
of all of the objects you produced in the bundleTextData() function.
*/
var handleFileSelect = function(evt) {
var files = evt.target.files; // FileList object
var loadList = [];
for (var i = 0, f; f = files[i]; i++) {
var p = new Promise(function(resolve, reject){
var reader = new FileReader();
reader.onload = function(fileData){ return function(event){
resolve(bundleTextData(fileData.name, event.target.result));
};
}(f);
reader.readAsText(f);
});
loadList.push(p);
}
Promise.all(loadList).then(function(data){
drawTextVis(data);
});
};
document.getElementById('files').addEventListener('change', handleFileSelect, false);
</script>
</body>
</html>