All pastes #964044 Raw Edit

Miscellany

public text v1 · immutable
#964044 ·published 2008-03-31 00:16 UTC
rendered paste body
//counts the words in a text file... 
//combined effort: wlfshmn from #java on IRC Undernet 
//and RAZII 
import java.io.*;
import java.util.*;
import java.nio.*; 
import java.nio.channels.*;
public final class WordCount3
{
 private static final Map<String, int[]> dictionary =
         new HashMap<String, int[]>(15000);
 private static int tWords = 0;
 private static int tLines = 0;
 private static long tBytes = 0;
 
 public static void main(final String[] args) throws Exception
 {
  System.out.println("Lines\tWords\tBytes\tFile\n");
  
  //TIME STARTS HERE 
  final long start = System.currentTimeMillis();
  for (String arg : args)
  {
   File file = new File(arg);
   if (!file.isFile())
   {
    continue;
   }
   
   int numLines = 0;
   int numWords = 0;
   long numBytes = file.length();

    ByteBuffer in = new FileInputStream(arg).getChannel().map(
        FileChannel.MapMode.READ_ONLY, 0, numBytes);
              
    StringBuilder sb = new StringBuilder();
    boolean inword = false; 	
    in.rewind();
    for (int i = 0; i < numBytes; i= i +2)
    {
      	char c =  (char) in.get();
      	if (c == '\n') 
	        numLines++; 
	    else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z')
	    {
	    	sb.append(c); 
	    	inword = true;      	
	    }
	    else if (inword)
        {
        	numWords++;
        	int[] count = dictionary.get(sb.toString());
        	if (count != null)
        	  { count[0]++;}
        	else
             {dictionary.put(sb.toString(), new int[]{1});}    
             sb.delete(0, sb.length());
             inword = false;
        } 
      
    } 
      	
  
   System.out.println( numLines + "\t" + numWords + "\t" + numBytes + "\t" + arg);
   tLines += numLines;
   tWords += numWords;
   tBytes += numBytes;
  }
  
  //only converting it to TreepMap so the result 
  //appear ordered, I could have 
  //moved this part down to printing phase 
  //(i.e. not include it in time).   
  TreeMap<String, int[] > sort = new TreeMap<String, int[]> (dictionary);
  
  //TIME ENDS HERE 
  final long end = System.currentTimeMillis();
  
  System.out.println("---------------------------------------");
  if (args.length > 1)
  {
  System.out.println(tLines + "\t" + tWords + "\t" + tBytes + "\tTotal");
   System.out.println("---------------------------------------");
  }
  for (Map.Entry<String, int[]> pairs : sort.entrySet())
  {
   System.out.println(pairs.getValue()[0] + "\t" + pairs.getKey());
  }
     System.out.println("Time: " + (end - start) + " ms");
 }
}