Part of Slepp's ProjectsPastebinTURLImagebinFilebin
Feedback -- English French German Japanese
Create Upload Newest Tools Donate
Sign In | Create Account

Advertising

Mine
Wednesday, May 23rd, 2012 at 4:52:55pm MDT 

  1. package TDC.Compression.Arithmetic;
  2.  
  3. /**
  4. * Model of algorithm. PPM
  5. * Prediction by partial matching (PPM) is an adaptive statistical
  6. * data compression technique based on context modeling and prediction.
  7. * PPM models use a set of previous symbols in the uncompressed symbol
  8. * stream to predict the next symbol in the stream.
  9. * @author Inna Tuzhikova
  10. */
  11. public final class PPMModel implements ArithCodeModel {
  12.  
  13.     /**
  14.      * Construct a new model with the specified maximum length of context to use
  15.      * for prediction.
  16.      *
  17.      * @param maxContextLength Maximum length of context to use for prediction.
  18.      */
  19.     public PPMModel(int maxContextLength) {
  20.         _maxContextLength = maxContextLength;
  21.         _buffer = new ByteBuffer(maxContextLength + 1);
  22.     }
  23.  
  24.     // specified in ArithCodeModel
  25.     @Override
  26.     public boolean escaped(int symbol) {
  27.         return (_contextNode != null
  28.                 && (symbol == ArithCodeModel.EOF
  29.                 || !_contextNode.hasDaughter(symbol)));
  30.     }
  31.  
  32.     // specified in ArithCodeModel
  33.     @Override
  34.     public void exclude(int i) {
  35.         _excludedBytes.add(i);
  36.     }
  37.  
  38.     // specified in ArithCodeModel
  39.     @Override
  40.     public void interval(int symbol, int[] result) {
  41.         if (symbol == ArithCodeModel.EOF) {
  42.             _backoffModel.interval(EOF, result, _excludedBytes);
  43.         } else if (symbol == ArithCodeModel.ESCAPE) {
  44.             intervalEscape(result);
  45.         } else {
  46.             intervalByte(symbol, result);
  47.         }
  48.     }
  49.  
  50.     // specified in ArithCodeModel
  51.     @Override
  52.     public int pointToSymbol(int count) {
  53.         if (_contextNode != null) {
  54.             return _contextNode.pointToSymbol(count, _excludedBytes);
  55.         }
  56.         return _backoffModel.pointToSymbol(count, _excludedBytes);
  57.     }
  58.  
  59.     // specified in ArithCodeModel
  60.     @Override
  61.     public int totalCount() {
  62.         if (_contextNode == null) {
  63.             return _backoffModel.totalCount(_excludedBytes);
  64.         }
  65.         return _contextNode.totalCount(_excludedBytes);
  66.     }
  67.  
  68.     // specified in ArithCodeModel
  69.     @Override
  70.     public void increment(int i) {
  71.         increment(Converter.integerToByte(i));
  72.     }
  73.  
  74.     /**
  75.      * Exclude all of the bytes in the specified byte set.
  76.      *
  77.      * @param bytesToExclude Set of bytes to exclude from outcome.
  78.      * @since 1.1
  79.      */
  80.     public void exclude(ByteSet bytesToExclude) {
  81.         _excludedBytes.add(bytesToExclude);
  82.     }
  83.     /**
  84.      * Count of bytes coded to use in pruning.
  85.      */
  86.     // private int _byteCount; // implied = 0; uncomment for pruning
  87.     /**
  88.      * Model to use for short contexts.
  89.      */
  90.     private final ExcludingAdaptiveUnigramModel _backoffModel =
  91.             new ExcludingAdaptiveUnigramModel();
  92.     /**
  93.      * Nodes at depth 1 in the model. All order 0 nodes are included in the
  94.      * unigram
  95.      */
  96.     private final PPMNode[] _contexts = new PPMNode[256];
  97.     /**
  98.      * Maximum context length to search in trie. Maximum count will be for
  99.      * maximum context length plus one.
  100.      */
  101.     private final int _maxContextLength;
  102.     /**
  103.      * Root of the trie structure of counts. Dummy byte as symbol.
  104.      */
  105.     private final PPMNode _rootNode = new PPMNode((byte) '.');
  106.     /**
  107.      * Current context length.
  108.      */
  109.     private int _contextLength; // implied = 0;
  110.     /**
  111.      * Current context node.
  112.      */
  113.     private PPMNode _contextNode; // = null;
  114.     /**
  115.      * Bytes buffered for use as context.
  116.      */
  117.     private final ByteBuffer _buffer;
  118.     /**
  119.      * Storage for the excluded bytes
  120.      */
  121.     private final ByteSet _excludedBytes = new ByteSet();
  122.  
  123.     /**
  124.      * Returns interval for byte specified as an integer in 0 to 255 range.
  125.      *
  126.      * @param i Integer specification of byte in 0 to 255 range.
  127.      * @param result Array specifying cumulative probability for byte i.
  128.      */
  129.     private void intervalByte(int i, int[] result) {
  130.         if (_contextNode != null) {
  131.             _contextNode.interval(i, _excludedBytes, result);
  132.         } else {
  133.             _backoffModel.interval(i, result, _excludedBytes);
  134.         }
  135.         increment(i);
  136.     }
  137.  
  138.     /**
  139.      * Returns interval for escape in current context.
  140.      *
  141.      * @param result Array for specifying cumulative probability for escape
  142.      * symbol in current context.
  143.      */
  144.     private void intervalEscape(int[] result) {
  145.         _contextNode.intervalEscape(_excludedBytes, result);
  146.         if (_contextLength >= MIN_CONTEXT_LENGTH) {
  147.             for (PPMNode child = _contextNode._firstChild;
  148.                     child != null;
  149.                     child = child._nextSibling) {
  150.                 _excludedBytes.add(child._byte);
  151.             }
  152.         }
  153.         // could decrement longer contexts more for a speedup in some cases
  154.         --_contextLength;
  155.         getContextNodeLongToShort();
  156.     }
  157.  
  158.    
  159.     /**
  160.      * Adds counts for given byte to model in current context and then updates
  161.      * the current context. Rescales counts if necessary. Called by both
  162.      * encoding and deocding.
  163.      *
  164.      * @param b Byte to add to model.
  165.      */
  166.     private void increment(byte b) {
  167.         _buffer.buffer(b);
  168.         byte firstByte = _buffer.bytes()[_buffer.offset()];
  169.         if (_contexts[Converter.byteToInteger(firstByte)] == null) {
  170.             _contexts[Converter.byteToInteger(firstByte)] =
  171.                     new PPMNode(firstByte);
  172.         }
  173.         if (_buffer.length() > 1) {
  174.             _contexts[Converter.byteToInteger(firstByte)]
  175.                     .increment(_buffer.bytes(),
  176.                     _buffer.offset() + 1,
  177.                     _buffer.length() - 1);
  178.         }
  179.        
  180.         _contextLength = Math.min(_maxContextLength, _buffer.length());
  181.         getContextNodeBinarySearch();
  182.         _excludedBytes.clear();
  183.     }
  184.  
  185.     /**
  186.      * Use binary search to set the context node up to the currently specified
  187.      * context length. May set it to
  188.      * <code>null</code> if not found.
  189.      */
  190.     private void getContextNodeBinarySearch() {
  191.         int low = MIN_CONTEXT_LENGTH;
  192.         int high = _contextLength;
  193.         _contextLength = MIN_CONTEXT_LENGTH - 1; // not sure we need this
  194.         _contextNode = null;
  195.         boolean isDeterministic = false;
  196.         while (high >= low) {
  197.             int contextLength = (high + low) / 2;
  198.             PPMNode contextNode = lookupNode(contextLength);
  199.             if (contextNode == null
  200.                     || contextNode.isChildless(_excludedBytes)) {
  201.                 if (contextLength < high) {
  202.                     high = contextLength;
  203.                 } else {
  204.                     --high;
  205.                 }
  206.             } else if (contextNode.isDeterministic(_excludedBytes)) {
  207.                 _contextLength = contextLength;
  208.                 _contextNode = contextNode;
  209.                 isDeterministic = true;
  210.                 if (contextLength < high) {
  211.                     high = contextLength;
  212.                 } else {
  213.                     --high;
  214.                 }
  215.             } else if (!isDeterministic) {
  216.                 _contextLength = contextLength;
  217.                 _contextNode = contextNode;
  218.                 if (contextLength > low) {
  219.                     low = contextLength;
  220.                 } else {
  221.                     ++low;
  222.                 }
  223.             } else {
  224.                 if (contextLength > low) {
  225.                     low = contextLength;
  226.                 } else {
  227.                     ++low;
  228.                 }
  229.             }
  230.         }
  231.     }
  232.  
  233.     /*
  234.      * un-used variant lookung up context lengths by starting with shortest and
  235.      * continuing to increase until found. private void
  236.      * getContextNodeShortToLong() { int maxContextLength = _contextLength;
  237.      * _contextNode = null; _contextLength = MIN_CONTEXT_LENGTH-1; for (int
  238.      * contextLength = MIN_CONTEXT_LENGTH; contextLength <= maxContextLength;
  239.      * ++contextLength) { PPMNode node = lookupNode(contextLength); if (node ==
  240.      * null || node.isChildless(_excludedBytes)) { continue; // return; lose
  241.      * around .01 b/B total (not even average) with return, but 25% slower }
  242.      * _contextNode = node; _contextLength = contextLength; if
  243.      * (node.isDeterministic(_excludedBytes)) return; } }
  244.      */
  245.     /**
  246.      * Starting at the longest context, count down in length to set a valid
  247.      * context or give up. This version finds the shortest deterministic context
  248.      * <= in length to the current context length, but if there is no
  249.      * deterministic context, returns longest context length that exists that is
  250.      * <= in length to the current context. Could also implement this in short
  251.      * to long order
  252.      */
  253.     private void getContextNodeLongToShort() {
  254.         while (_contextLength >= MIN_CONTEXT_LENGTH) {
  255.             _contextNode = lookupNode(_contextLength);
  256.             if (_contextNode == null
  257.                     || _contextNode.isChildless(_excludedBytes)) {
  258.                 --_contextLength;
  259.                 continue;
  260.             }
  261.             while (_contextLength > MIN_CONTEXT_LENGTH
  262.                     && _contextNode.isDeterministic(_excludedBytes)) {
  263.                 // backoff to shortest deterministic node
  264.                 //if context node is deterministic
  265.                 PPMNode backoffNode = lookupNode(_contextLength - 1);
  266.                 if (backoffNode == null
  267.                         || !backoffNode.isDeterministic(_excludedBytes)) {
  268.                     return;
  269.                 }
  270.                 _contextNode = backoffNode;
  271.                 --_contextLength;
  272.             }
  273.             return;
  274.         }
  275.         _contextNode = null;
  276.     }
  277.  
  278.     /**
  279.      * Returns node from the current byte buffer of the specified context
  280.      * length, or null if there isn't one.
  281.      *
  282.      * @param contextLength Number of bytes of context used.
  283.      * @return Node found at that context.
  284.      */
  285.     private PPMNode lookupNode(int contextLength) {
  286.         PPMNode node = _contexts[Converter.byteToInteger(
  287.                 _buffer.bytes()[_buffer.offset()
  288.                 + _buffer.length()
  289.                 - contextLength])];
  290.         if (node == null) {
  291.             return (PPMNode) null;
  292.         }
  293.         return lookup(node, _buffer.bytes(),
  294.                 _buffer.offset() + _buffer.length() - contextLength + 1,
  295.                 contextLength - 1);
  296.     }
  297.  
  298.     /**
  299.      * Looks up a node from the given bytes, offset and length starting from the
  300.      * specified node.
  301.      *
  302.      * @param node Node from which to search.
  303.      * @param bytes Sequence of bytes to search.
  304.      * @param offset Offset into sequence of bytes of the first byte.
  305.      * @param length Number of bytes to look up.
  306.      * @return Node found for the given bytes.
  307.      */
  308.     private static PPMNode lookup(PPMNode node, byte[] bytes, int offset,
  309.             int length) {
  310.         if (length == 0) {
  311.             return node;
  312.         }
  313.         for (PPMNode child = node._firstChild; length > 0 && child != null;) {
  314.             if (bytes[offset] == child._byte) {
  315.                 if (length == 1) {
  316.                     return child;
  317.                 }
  318.                 node = child;
  319.                 child = child._firstChild;
  320.                 ++offset;
  321.                 --length;
  322.             } else {
  323.                 child = child._nextSibling;
  324.             }
  325.         }
  326.         return (PPMNode) null;
  327.     }
  328.     /**
  329.      * Minimum context length to look down sequence of nodes. Shorter contexts
  330.      * use backoff model.
  331.      */
  332.     private static final int MIN_CONTEXT_LENGTH = 1;
  333.     }

advertising

Update the Post

Either update this post and resubmit it with changes, or make a new post.

You may also comment on this post.

update paste below
details of the post (optional)

Note: Only the paste content is required, though the following information can be useful to others.

Save name / title?

(space separated, optional)



Please note that information posted here will expire by default in one month. If you do not want it to expire, please set the expiry time above. If it is set to expire, web search engines will not be allowed to index it prior to it expiring. Items that are not marked to expire will be indexable by search engines. Be careful with your passwords. All illegal activities will be reported and any information will be handed over to the authorities, so be good.

worth-right
fantasy-obligation