Advertising
- Stuff
- Wednesday, May 23rd, 2012 at 4:55:35pm MDT
- package TDC.Compression.Arithmetic;
- import java.io.IOException;
- import java.util.HashMap;
- /**
- * Model of a sequence of tokens. Limited to 256 distinct tokens (to enable
- * coding as PPM).
- *
- * @author Inna Tuzhikova
- */
- public final class TokenStreamEncoder {
- public TokenStreamEncoder(ArithEncoder encoder,
- int tokenSequenceOrder,
- PPMModel tokenBytesModel) {
- _encoder = encoder;
- _tokenBytesModel = tokenBytesModel;
- _tokenSequenceModel = new PPMModel(tokenSequenceOrder);
- }
- if (_tokenToSymbolMap.containsKey(token)) {
- } else {
- encodeToken(_nextTokenIndex);
- // must do after encodeToken, because encodeToken uses it
- ++_nextTokenIndex;
- encodeBytes(token.getBytes(LATIN1));
- }
- }
- for (int i = _nextTokenIndex + 1; i < 256; ++i) {
- _tokenSequenceModel.exclude(i);
- }
- encode(_tokenSequenceModel, symbol);
- }
- for (int i = 0; i < bytes.length; ++i) {
- _tokenBytesModel.exclude(LATIN1_UNUSED_BYTES);
- encode(_tokenBytesModel, Converter.byteToInteger(bytes[i]));
- }
- _tokenBytesModel.exclude(LATIN1_UNUSED_BYTES);
- // uses 0 as separator, which can be trouble if 0 is a valid character
- encode(_tokenBytesModel, 0);
- }
- while (model.escaped(symbol)) {
- // have already done complete walk to compute escape
- model.interval(ArithCodeModel.ESCAPE, _interval);
- _encoder.encode(_interval);
- }
- // have already done walk to element to compute escape
- model.interval(symbol, _interval);
- _encoder.encode(_interval);
- }
- /**
- * Arithmetic encoder used for encoding symbols and the bytes making them
- * up.
- */
- private final ArithEncoder _encoder;
- /**
- * Interval used for coding ranges.
- */
- private final int[] _interval = new int[3];
- /**
- * Index of next token, which must fall between 0 and 255 inclusive.
- */
- private int _nextTokenIndex = 0;
- /**
- * MOdel for the bytes making up the tokens.
- */
- private final PPMModel _tokenBytesModel;
- /**
- * Model for the sequence of tokens, encoded as bytes, making up the token
- * stream.
- */
- private final PPMModel _tokenSequenceModel;
- /**
- * Maps each token string to an Integer used to encode it.
- */
- private static final ByteSet LATIN1_UNUSED_BYTES = new ByteSet();
- static {
- for (int i = 1; i <= 8; ++i) {
- LATIN1_UNUSED_BYTES.add(i);
- }
- for (int i = 11; i <= 12; ++i) {
- LATIN1_UNUSED_BYTES.add(i);
- }
- for (int i = 14; i <= 31; ++i) {
- LATIN1_UNUSED_BYTES.add(i);
- }
- for (int i = 127; i <= 159; ++i) {
- LATIN1_UNUSED_BYTES.add(i);
- }
- }
- }
advertising
Update the Post
Either update this post and resubmit it with changes, or make a new post.
You may also comment on this post.
Please note that information posted here will expire by default in one month. If you do not want it to expire, please set the expiry time above. If it is set to expire, web search engines will not be allowed to index it prior to it expiring. Items that are not marked to expire will be indexable by search engines. Be careful with your passwords. All illegal activities will be reported and any information will be handed over to the authorities, so be good.