Part of Slepp's ProjectsPastebinTURLImagebinFilebin
Feedback -- English French German Japanese
Create Upload Newest Tools Donate
Sign In | Create Account

Advertising

Miscellany
Wednesday, May 23rd, 2012 at 4:54:03pm MDT 

  1. package TDC.Compression.Arithmetic;
  2.  
  3. /**
  4. *
  5. * @author Inna Tuzhikova
  6. */
  7. final class PPMNode {
  8.  
  9.     /**
  10.      * Construct a node with the specified byte and next sibling.
  11.      *
  12.      * @param b Byte represented by node.
  13.      * @param nextSibling The next daughter node in the list of daughters.
  14.      */
  15.     PPMNode(byte b, PPMNode nextSibling) {
  16.         _byte = b;
  17.         _nextSibling = nextSibling;
  18.     }
  19.  
  20.     /**
  21.      * Construct a node with the specified byte.
  22.      *
  23.      * @param b Byte represented by node.
  24.      */
  25.     PPMNode(byte b) {
  26.         this(b, null);
  27.     }
  28.  
  29.     /**
  30.      * Returns
  31.      * <code>true</code> if the number of children for this node is
  32.      * <code>1</code>.
  33.      *
  34.      * @param bytes Bytes that have been seen in escaped context that should not
  35.      * be considered children.
  36.      * @return
  37.      * <code>true</code> if the scaled number of outcomes for this node is
  38.      * <code>1</code>.
  39.      */
  40.     boolean isDeterministic(ByteSet excludedBytes) {
  41.         return _firstChild._nextSibling == null;
  42.         /*
  43.          * doing it right is about 10-12% slower and less than .01 b/B better
  44.          * int numOutcomes = 0; for (PPMNode node = _firstChild; node != null;
  45.          * node = node._nextSibling) if (!excludedBytes.contains(node._byte) &&
  46.          * ++numOutcomes > 1) return false; return numOutcomes == 1;
  47.          */
  48.     }
  49.  
  50.     /**
  51.      * Returns
  52.      * <code>true</code> if this node has no children, not counting specified
  53.      * exclusions.
  54.      *
  55.      * @param excludedBytes Bytes to exclude as children
  56.      * @return
  57.      * <code>true</code> if this node has no children, not counting
  58.      */
  59.     boolean isChildless(ByteSet excludedBytes) {
  60.         // not much faster and compresses less due to added escapes
  61.         for (PPMNode node = _firstChild;
  62.                 node != null;
  63.                 node = node._nextSibling) {
  64.             if (!excludedBytes.contains(node._byte)) {
  65.                 return false;
  66.             }
  67.         }
  68.         return true;
  69.     }
  70.  
  71.     /**
  72.      * Total count for this node, not including those bytes in the specified
  73.      * set.
  74.      *
  75.      * @param excludedBytes Set of bytes to exclude from counts.
  76.      * @return Total count for this node.
  77.      */
  78.     int totalCount(ByteSet excludedBytes) {
  79.         int count = _numberOfOutcomes;
  80.         for (PPMNode child = _firstChild;
  81.                 child != null;
  82.                 child = child._nextSibling) {
  83.             if (!excludedBytes.contains(child._byte)) {
  84.                 count += child._count;
  85.             }
  86.         }
  87.         return count;
  88.     }
  89.  
  90.     /**
  91.      * Calculates the interval for the specified byte from this node and writes
  92.      * it into the specified array.
  93.      *
  94.      * @param b Byte whose interval is calcuated.
  95.      * @param excludedBytes Set of bytes to exclude from counts.
  96.      * @param result Array in which to write the range for the specified byte.
  97.      */
  98.     void interval(int i, ByteSet excludedBytes, int[] result) {
  99.         interval(Converter.integerToByte(i), excludedBytes, result);
  100.     }
  101.  
  102.     /**
  103.      * Calculates the interval for the specified byte from this node and writes
  104.      * it into the specified array.
  105.      *
  106.      * @param b Byte whose interval is calcuated.
  107.      * @param excludedBytes Set of bytes to exclude from counts.
  108.      * @param result Array in which to write the range for the specified byte.
  109.      */
  110.     private void interval(byte b, ByteSet excludedBytes, int[] result) {
  111.         result[0] = 0;
  112.         for (PPMNode dtrNode = _firstChild;
  113.                 dtrNode != null;
  114.                 dtrNode = dtrNode._nextSibling) {
  115.             if (excludedBytes.contains(dtrNode._byte)) {
  116.                 continue;
  117.             }
  118.             if (dtrNode._byte == b) {
  119.                 result[1] = result[0] + dtrNode._count;
  120.                 result[2] = result[1] + _numberOfOutcomes;
  121.                 for (dtrNode = dtrNode._nextSibling;
  122.                         dtrNode != null;
  123.                         dtrNode = dtrNode._nextSibling) {
  124.                     if (!excludedBytes.contains(dtrNode._byte)) {
  125.                         result[2] += dtrNode._count;
  126.                     }
  127.                 }
  128.                 return;
  129.             }
  130.             result[0] += dtrNode._count;
  131.         }
  132.     }
  133.  
  134.     /**
  135.      * The interval for the escape count, less the set of excluded bytes.
  136.      *
  137.      * @param excludedBytes Set of bytes to exclude from counts.
  138.      * @param result Array into which to write the range for the specified
  139.      * bytes.
  140.      */
  141.     void intervalEscape(ByteSet excludedBytes, int[] result) {
  142.         result[2] = (result[1] = totalCount(excludedBytes));
  143.         result[0] = result[1] - _numberOfOutcomes;
  144.     }
  145.  
  146.     /**
  147.      * Increment the counts for this node for the string specified in the
  148.      * buffer.
  149.      *
  150.      * @param buffer Buffer of bytes from which to read event to increment.
  151.      */
  152.     void increment(ByteBuffer buffer) {
  153.         if (buffer.length() > 0) {
  154.             increment(buffer.bytes(), buffer.offset(), buffer.length());
  155.         }
  156.     }
  157.  
  158.     /**
  159.      * Returns
  160.      * <code>true</code> if this node has a child with the specified byte,
  161.      * specified as an integer.
  162.      *
  163.      * @param b Byte coded as integer to check.
  164.      * @return </code>true</code> if there is a child node with the specified
  165.      * byte.
  166.      */
  167.     boolean hasDaughter(int i) {
  168.         return hasDaughter(Converter.integerToByte(i));
  169.     }
  170.  
  171.     /**
  172.      * Returns
  173.      * <code>true</code> if this node has a child with the specified byte.
  174.      *
  175.      * @param b Byte to check.
  176.      * @return </code>true</code> if there is a child node with the specified
  177.      * byte.
  178.      */
  179.     private boolean hasDaughter(byte b) {
  180.         for (PPMNode dtrNode = _firstChild;
  181.                 dtrNode != null;
  182.                 dtrNode = dtrNode._nextSibling) {
  183.             if (dtrNode._byte == b) {
  184.                 return true;
  185.             }
  186.         }
  187.         return false;
  188.     }
  189.  
  190.     /**
  191.      * Retrieves the symbol for which the midCount is between its low and high
  192.      * counts (inclusive on low, exclusive on high).
  193.      *
  194.      * @param midCount Count for which to find symbol.
  195.      * @param excludedBytes Set of bytes to exclude from counts.
  196.      * @return Symbol with specified count.
  197.      */
  198.     int pointToSymbol(int midCount, ByteSet excludedBytes) {
  199.         int highCount = 0;
  200.         for (PPMNode child = _firstChild;
  201.                 child != null;
  202.                 child = child._nextSibling) {
  203.             if (excludedBytes.contains(child._byte)) {
  204.                 continue;
  205.             }
  206.             highCount += child._count;
  207.             if (highCount > midCount) {
  208.                 return Converter.byteToInteger(child._byte);
  209.             }
  210.         }
  211.         return ArithCodeModel.ESCAPE;
  212.     }
  213.  
  214.     /**
  215.      * Extends this node with the given sequence of bytes, specified by an
  216.      * array, offset and length.
  217.      *
  218.      * @param bytes Byte array providing bytes to extend.
  219.      * @param offset Index of first byte in array.
  220.      * @param length Number of bytes to extend.
  221.      */
  222.     void complete(byte[] bytes, int offset, int length) {
  223.         PPMNode node = this;
  224.         while (length > 0) {
  225.             ++node._numberOfOutcomes;
  226.             node = node._firstChild = new PPMNode(bytes[offset]);
  227.             ++offset;
  228.             --length;
  229.         }
  230.     }
  231.  
  232.     /**
  233.      * Increment the count of all of the nodes along the sequence of bytes
  234.      * determined by the specified array, beginning at the specified offset and
  235.      * continuing for the specified length number of bytes.
  236.      *
  237.      * @param bytes Array from which to read bytes.
  238.      * @param offset Index of first byte to read from array.
  239.      * @param length Total number of bytes to read from array.
  240.      */
  241.     void increment(byte[] bytes, int offset, int length) {
  242.         if (_firstChild == null) {
  243.             ++_numberOfOutcomes;
  244.             _firstChild = new PPMNode(bytes[offset]);
  245.             if (length > 1) {
  246.                 _firstChild.complete(bytes, offset + 1, length - 1);
  247.             }
  248.             return;
  249.         }
  250.         PPMNode previousChild = null;             // move to front                   
  251.         for (PPMNode child = _firstChild; true; child = child._nextSibling) {
  252.             if (child._byte == bytes[offset]) {
  253.                 if (length > 1) {
  254.                     child.increment(bytes, offset + 1, length - 1);
  255.                 }
  256.                 if (previousChild != null) {   // move to front
  257.                     previousChild._nextSibling = child._nextSibling;
  258.                     child._nextSibling = _firstChild;
  259.                     _firstChild = child;
  260.                 }
  261.                 if (++child._count > MAX_INDIVIDUAL_COUNT) {
  262.                     rescale();
  263.                 }
  264.                 return;
  265.             }
  266.             if (child._nextSibling == null) {
  267.                 ++_numberOfOutcomes;
  268.                 // start in front
  269.                 _firstChild = new PPMNode(bytes[offset], _firstChild);
  270.                 if (length > 1) {
  271.                     // start in front
  272.                     _firstChild.complete(bytes, offset + 1, length - 1);
  273.                 }
  274.                 return;
  275.             }
  276.             previousChild = child;
  277.         }
  278.     }
  279.     /**
  280.      * The byte for this node.
  281.      */
  282.     final byte _byte;
  283.     /**
  284.      * The scaled count for this node.
  285.      */
  286.     private short _count = 1;
  287.     /**
  288.      * The scaled number of outcomes used to calculate escape likelihoods.
  289.      */
  290.     private short _numberOfOutcomes; // implied = 0;
  291.     /**
  292.      * The first child of this node.
  293.      */
  294.     PPMNode _firstChild; // implied = null;
  295.     /**
  296.      * The next sibling of this node.
  297.      */
  298.     PPMNode _nextSibling; // implied = null;
  299.  
  300.     /**
  301.      * Rescale all of the counts of the children of this node. Divides by 2,
  302.      * rounding up, but eliminates all nodes that fall below count threshold.
  303.      * Total number of outcomes is also rescaled, but it will never fall below
  304.      * <code>1</code> to allow possiblity for escapes.
  305.      */
  306.     private void rescale() {
  307.         _numberOfOutcomes = (short) ((_numberOfOutcomes + 1) / 2);
  308.         _firstChild = _firstChild.rescaleSiblings();
  309.     }
  310.  
  311.     /**
  312.      * Rescale the counts on this node and the siblings of this node. Divides by
  313.      * 2, rounding up, so no count every drops below 1. Returns rescaled node,
  314.      * which may not be original sibling or may be
  315.      * <code>null</code> if siblings scale below
  316.      */
  317.     private PPMNode rescaleSiblings() {
  318.         _count >>= 1// cheap divide by 2
  319.         if (_nextSibling == null) {
  320.             return (_count < MIN_COUNT) ? null : this;
  321.         }
  322.         if (_count < MIN_COUNT) {
  323.             return _nextSibling.rescaleSiblings();
  324.         }
  325.         _nextSibling = _nextSibling.rescaleSiblings();
  326.         return this;
  327.     }
  328.     /**
  329.      * Minimum count for which to retain a node during rescaling. Surprisingly
  330.      * insensitive.
  331.      */
  332.     private static final int MIN_COUNT = 128;
  333.     /**
  334.      * Maximum count for daughter node before rescaling all daughters. Max value
  335.      * is 8K
  336.      */
  337.     private static final int MAX_INDIVIDUAL_COUNT = 8 * 1024;
  338. }

advertising

Update the Post

Either update this post and resubmit it with changes, or make a new post.

You may also comment on this post.

update paste below
details of the post (optional)

Note: Only the paste content is required, though the following information can be useful to others.

Save name / title?

(space separated, optional)



Please note that information posted here will expire by default in one month. If you do not want it to expire, please set the expiry time above. If it is set to expire, web search engines will not be allowed to index it prior to it expiring. Items that are not marked to expire will be indexable by search engines. Be careful with your passwords. All illegal activities will be reported and any information will be handed over to the authorities, so be good.

worth-right
fantasy-obligation