Part of Slepp's ProjectsPastebinTURLImagebinFilebin
Feedback -- English French German Japanese
Create Upload Newest Tools Donate
Sign In | Create Account

Advertising

Stuff
Wednesday, May 23rd, 2012 at 4:55:35pm MDT 

  1. package TDC.Compression.Arithmetic;
  2.  
  3. import java.io.IOException;
  4. import java.util.HashMap;
  5.  
  6. /**
  7. * Model of a sequence of tokens. Limited to 256 distinct tokens (to enable
  8. * coding as PPM).
  9. *
  10. * @author Inna Tuzhikova
  11. */
  12. public final class TokenStreamEncoder {
  13.  
  14.     public TokenStreamEncoder(ArithEncoder encoder,
  15.             int tokenSequenceOrder,
  16.             PPMModel tokenBytesModel) {
  17.         _encoder = encoder;
  18.         _tokenBytesModel = tokenBytesModel;
  19.         _tokenSequenceModel = new PPMModel(tokenSequenceOrder);
  20.         _tokenToSymbolMap = new HashMap();
  21.     }
  22.  
  23.     public void encode(String token) throws IOException {
  24.         if (_tokenToSymbolMap.containsKey(token)) {
  25.             encodeToken(((Integer) (_tokenToSymbolMap.get(token))).intValue());
  26.         } else {
  27.             _tokenToSymbolMap.put(token, new Integer(_nextTokenIndex++));
  28.             encodeToken(_nextTokenIndex);
  29.             // must do after encodeToken, because encodeToken uses it
  30.             ++_nextTokenIndex;
  31.             encodeBytes(token.getBytes(LATIN1));
  32.         }
  33.     }
  34.  
  35.     private void encodeToken(int symbol) throws IOException {
  36.         for (int i = _nextTokenIndex + 1; i < 256; ++i) {
  37.             _tokenSequenceModel.exclude(i);
  38.         }
  39.         encode(_tokenSequenceModel, symbol);
  40.     }
  41.  
  42.     private void encodeBytes(byte[] bytes) throws IOException {
  43.         for (int i = 0; i < bytes.length; ++i) {
  44.             _tokenBytesModel.exclude(LATIN1_UNUSED_BYTES);
  45.             encode(_tokenBytesModel, Converter.byteToInteger(bytes[i]));
  46.         }
  47.         _tokenBytesModel.exclude(LATIN1_UNUSED_BYTES);
  48.         // uses 0 as separator, which can be trouble if 0 is a valid character
  49.         encode(_tokenBytesModel, 0);
  50.     }
  51.  
  52.     private void encode(PPMModel model, int symbol) throws IOException {
  53.         while (model.escaped(symbol)) {
  54.             // have already done complete walk to compute escape
  55.             model.interval(ArithCodeModel.ESCAPE, _interval);
  56.             _encoder.encode(_interval);
  57.         }
  58.         // have already done walk to element to compute escape
  59.         model.interval(symbol, _interval);
  60.         _encoder.encode(_interval);
  61.     }
  62.     /**
  63.      * Arithmetic encoder used for encoding symbols and the bytes making them
  64.      * up.
  65.      */
  66.     private final ArithEncoder _encoder;
  67.     /**
  68.      * Interval used for coding ranges.
  69.      */
  70.     private final int[] _interval = new int[3]
  71.    
  72.     /**
  73.      * Index of next token, which must fall between 0 and 255 inclusive.
  74.      */
  75.     private int _nextTokenIndex = 0;
  76.    
  77.     /**
  78.      * MOdel for the bytes making up the tokens.
  79.      */
  80.     private final PPMModel _tokenBytesModel;
  81.    
  82.     /**
  83.      * Model for the sequence of tokens, encoded as bytes, making up the token
  84.      * stream.
  85.      */
  86.     private final PPMModel _tokenSequenceModel;
  87.    
  88.     /**
  89.      * Maps each token string to an Integer used to encode it.
  90.      */
  91.     private final HashMap _tokenToSymbolMap;
  92.     private final static String LATIN1 = "ISO-8859-1";
  93.     private static final ByteSet LATIN1_UNUSED_BYTES = new ByteSet();
  94.  
  95.     static {
  96.         for (int i = 1; i <= 8; ++i) {
  97.             LATIN1_UNUSED_BYTES.add(i);
  98.         }
  99.         for (int i = 11; i <= 12; ++i) {
  100.             LATIN1_UNUSED_BYTES.add(i);
  101.         }
  102.         for (int i = 14; i <= 31; ++i) {
  103.             LATIN1_UNUSED_BYTES.add(i);
  104.         }
  105.         for (int i = 127; i <= 159; ++i) {
  106.             LATIN1_UNUSED_BYTES.add(i);
  107.         }
  108.     }
  109. }

advertising

Update the Post

Either update this post and resubmit it with changes, or make a new post.

You may also comment on this post.

update paste below
details of the post (optional)

Note: Only the paste content is required, though the following information can be useful to others.

Save name / title?

(space separated, optional)



Please note that information posted here will expire by default in one month. If you do not want it to expire, please set the expiry time above. If it is set to expire, web search engines will not be allowed to index it prior to it expiring. Items that are not marked to expire will be indexable by search engines. Be careful with your passwords. All illegal activities will be reported and any information will be handed over to the authorities, so be good.

worth-right
fantasy-obligation