-
Notifications
You must be signed in to change notification settings - Fork 1
/
print-desktop-en-us-all-oses-increasing-ids-time-url-title-content.rb
executable file
·120 lines (110 loc) · 3.68 KB
/
print-desktop-en-us-all-oses-increasing-ids-time-url-title-content.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env ruby
require 'json'
require 'rubygems'
require 'awesome_print'
require 'json'
require 'time'
require 'date'
require 'csv'
require 'logger'
require 'nokogiri'
logger = Logger.new(STDERR)
logger.level = Logger::DEBUG
if ARGV.length < 2
puts "usage: #{$0} [sumoquestions.csv] csv|markdown|markdown25"
exit
end
FILENAME = ARGV[0]
csv = false
markdown = false
markdown25 = false
if ARGV[1] == "csv"
csv = true
elsif ARGV[1] == "markdown"
markdown = true
elsif ARGV[1] == "markdown25"
markdown = true
markdown25 = true
else
puts "usage: #{$0} [sumoquestions.csv] csv|markdown|markdown25"
exit
end
num_questions = 0
id_time_url_title_content_tags_array = []
CSV.foreach(FILENAME, :headers => true) do |row|
hash = {}
content = ""
logger.debug row['tags']
logger.debug row['title']
locale = row['locale']
product = row['product']
next if locale != "en-US" || product != 'firefox'
content = Nokogiri::HTML.fragment(row['content']).text
logger.debug 'CONTENT:' + content
content = content[0..279] + "..." if content.length > 279 if markdown
content = content[0..1023] + "..." if content.length > 1023 if csv
num_questions += 1
id_time_url_title_content_tags_array.push(
[
row['id'].to_i,
#Time.at(row["created"].to_i).strftime("%-m/%-d/%Y %H:%M:%S"), # 10/2/2019 20:34:35
row["created"],
"https://support.mozilla.org/questions/" + row['id'].to_s,
row['title'][0..79],
content.tr("\n"," "),
row["tags"],
product,
locale
])
end
logger.debug 'num_questions:' + num_questions.to_s
sorted_array = id_time_url_title_content_tags_array.sort_by { |h| h[0] }
if markdown25
sorted_array = sorted_array.shuffle
twenty_five_percent_index = (sorted_array.length * 0.25).round.to_i
sorted_array = sorted_array[0..twenty_five_percent_index]
end
headers = ['id', 'created', 'url', 'title', 'content', 'tags','product', 'locale']
if csv
output_filename = sprintf("sorted-all-desktop-en-us-%s", ARGV[0])
CSV.open(output_filename, "w", write_headers: true, headers: headers) {|csv_object|
sorted_array.each {|row_array| csv_object << row_array }}
elsif markdown
if markdown25
output_filename= sprintf("25-percent-random-all-desktop-en-us-%s", ARGV[0]).gsub(".csv", ".md")
else
output_filename = sprintf("sorted-all-desktop-en-us-%s", ARGV[0]).gsub(".csv", ".md")
end
logger.debug 'markdown filename:' + output_filename
open(output_filename, 'w') do |f|
f.puts "Number of questions:" + sorted_array.length.to_s + "\n\n"
f.puts "| id:created | Title | Content | Tags | Notes | "
f.puts "| --- | --- | --- | --- | --- |"
sorted_array.each do |row_array|
tags_array = row_array[5].split(';')
logger.debug "tags_array" + tags_array.to_s
tags_markdown = ""
tags_array.each do |t|
logger.debug t
tags_markdown += "[" + t + "]" +
"(https://support.mozilla.org/en-US/questions/firefox?tagged="+ t + ")" + ";"
end
tags_markdown = ";" if tags_markdown == ""
slice_str = row_array[4].tr("\n","")[80..-1]
if slice_str.nil?
f.puts(
sprintf("| [%d](%s)<br>%s | %s | %s | %s| |\n",
row_array[0], row_array[2],
row_array[1], row_array[3].tr("\n","")[0..79],row_array[4].tr("\n","")[0..79],
tags_markdown))
else
f.puts(
sprintf("| [%d](%s)<br>%s | %s |<details><summary>%s</summary>%s</details> | %s|\n",
row_array[0], row_array[2],
row_array[1], row_array[3].tr("\n","")[0..79],row_array[4].tr("\n","")[0..79],
slice_str,
tags_markdown))
end
end
end
end