#!/usr/bin/ruby#All the requirementsrequire "cgi"require "net/http"require "uri"require "date"def cutlink(href) h_array = href.split(//) link = ""; inlink = false; 0.upto(h_array.length-1) { |i| if(inlink) then if(h_array[i] == "\"") then inlink = false; break; end if(h_array[i] == "\n") then next end if(h_array[i] == "'") then next end link += h_array[i] next end if(h_array[i] == "\"") then inlink = true end } return linkenddef cutlinks(body) linkarray = "" bodyarray = body.split(/ /) 0.upto(bodyarray.length-1) { |i| if(bodyarray[i].to_s[0..3].downcase == "href") then #the @ character is used to split this into an array of links linkarray += cutlink(bodyarray[i]).downcase+"@" end } return linkarrayenddef sortlinks(links) newlinks = [] linkcount = 0 0.upto(links.length-1) { |i| if(links[i].include? "search?q=cache:") then next end #if it's part of google caching system if(links[i][0..0] == "/") then next end #if it's a local link if(links[i].include? "google") then next end #if it's something from google (mail, docs, etc) if(links[i].include? "\n") then next end #links shouldn't have newline characters if(links[i].include? "froogle") then next end #links shouldn't be store items if(links[i].include? "Froogle") then next end #links shouldn't be store items if(links[i].include? "cm_mmc=seo-_-feeds") then next end if(links[i].include? "youtube.com/results?q") then next end #links shouldn't be youtube searches if(links[i] == nil) then next end #if the link is null if(links[i].include? " ") then next end #links shouldn't have spaces if(links[i][7..9] == "209") then next end #these were used when I manually set google's cache servers if(links[i][7..9] == "64.") then next end #changed in favor of the "search?q=cache:" if(links[i][7..9] == "72.") then next end if(links[i][0..4] == "https") then next end #Links shouldn't use secure socket layer newlinks[linkcount] = links[i] #if it passes the above, it's added to the link array linkcount += 1 } return newlinksenddef google(term) url="http://www.google.com/search?q="+term+"&num=10&hl=en&lr=&as_qdr=all&start=0&sa=N" body = "" #Iterates through the url for the specified searches #may unroll this to default to all (1000) results #may also thread this to speed it up a bit request = Net::HTTP.get_response(URI.parse(url)) body = request.body return bodyend#Program start#host = "localhost"#user = "minehowe_default"#pass = "babyoil"#db = "minehowe_keywords"#cgi = CGI.new#fname = cgi['filename']fname = "keywordsforben.txt"domain = "wikipedia.org"source = File.open("../uploads/"+fname, "r")sink = File.open("output.csv","w")terms = []i = 0;term = source.getswhile term != nil terms[i] = term i += 1 term = source.getsend0.upto(terms.length-1){ |j| terms[j].chomp search_array = terms[j].split(//) terms[j] = "" 0.upto(search_array.length-1) { |i| if(search_array[i] == " ") then terms[j] += "+" #this adds the required "+" char in between the terms next end terms[j] += search_array[i] }}bodys = []0.upto(terms.length-1) { |j| bodys[j] = google(terms[j])}links = []print "Content-type: text/html\n\n"0.upto(terms.length-1) { |j| links[j] = cutlinks(bodys[j]) links[j] = links[j].split(/@/) links[j] = sortlinks(links[j]) links[j].uniq!}final = []place = 00.upto(terms.length-1) { |j| linkarray = links[j] flink = "not found" 9.downto(0) {|k| if(linkarray[k] != nil) then if(linkarray[k].include? domain) then flink = linkarray[k] place = k+1 end end } final[j] = terms[j].chomp+","+flink+","+place.to_s# print flink+"<br />\n"# formatted = terms[j]+","+flink+","+place.to_s# print formatted+"<br />"}0.upto(terms.length-1) { |j| print final[j]+"\n"}