Advertising
- Miscellany
- Wednesday, May 23rd, 2012 at 4:54:03pm MDT
- package TDC.Compression.Arithmetic;
- /**
- *
- * @author Inna Tuzhikova
- */
- final class PPMNode {
- /**
- * Construct a node with the specified byte and next sibling.
- *
- * @param b Byte represented by node.
- * @param nextSibling The next daughter node in the list of daughters.
- */
- PPMNode(byte b, PPMNode nextSibling) {
- _byte = b;
- _nextSibling = nextSibling;
- }
- /**
- * Construct a node with the specified byte.
- *
- * @param b Byte represented by node.
- */
- PPMNode(byte b) {
- this(b, null);
- }
- /**
- * Returns
- * <code>true</code> if the number of children for this node is
- * <code>1</code>.
- *
- * @param bytes Bytes that have been seen in escaped context that should not
- * be considered children.
- * @return
- * <code>true</code> if the scaled number of outcomes for this node is
- * <code>1</code>.
- */
- boolean isDeterministic(ByteSet excludedBytes) {
- return _firstChild._nextSibling == null;
- /*
- * doing it right is about 10-12% slower and less than .01 b/B better
- * int numOutcomes = 0; for (PPMNode node = _firstChild; node != null;
- * node = node._nextSibling) if (!excludedBytes.contains(node._byte) &&
- * ++numOutcomes > 1) return false; return numOutcomes == 1;
- */
- }
- /**
- * Returns
- * <code>true</code> if this node has no children, not counting specified
- * exclusions.
- *
- * @param excludedBytes Bytes to exclude as children
- * @return
- * <code>true</code> if this node has no children, not counting
- */
- boolean isChildless(ByteSet excludedBytes) {
- // not much faster and compresses less due to added escapes
- for (PPMNode node = _firstChild;
- node != null;
- node = node._nextSibling) {
- if (!excludedBytes.contains(node._byte)) {
- return false;
- }
- }
- return true;
- }
- /**
- * Total count for this node, not including those bytes in the specified
- * set.
- *
- * @param excludedBytes Set of bytes to exclude from counts.
- * @return Total count for this node.
- */
- int totalCount(ByteSet excludedBytes) {
- int count = _numberOfOutcomes;
- for (PPMNode child = _firstChild;
- child != null;
- child = child._nextSibling) {
- if (!excludedBytes.contains(child._byte)) {
- count += child._count;
- }
- }
- return count;
- }
- /**
- * Calculates the interval for the specified byte from this node and writes
- * it into the specified array.
- *
- * @param b Byte whose interval is calcuated.
- * @param excludedBytes Set of bytes to exclude from counts.
- * @param result Array in which to write the range for the specified byte.
- */
- void interval(int i, ByteSet excludedBytes, int[] result) {
- interval(Converter.integerToByte(i), excludedBytes, result);
- }
- /**
- * Calculates the interval for the specified byte from this node and writes
- * it into the specified array.
- *
- * @param b Byte whose interval is calcuated.
- * @param excludedBytes Set of bytes to exclude from counts.
- * @param result Array in which to write the range for the specified byte.
- */
- private void interval(byte b, ByteSet excludedBytes, int[] result) {
- result[0] = 0;
- for (PPMNode dtrNode = _firstChild;
- dtrNode != null;
- dtrNode = dtrNode._nextSibling) {
- if (excludedBytes.contains(dtrNode._byte)) {
- continue;
- }
- if (dtrNode._byte == b) {
- result[1] = result[0] + dtrNode._count;
- result[2] = result[1] + _numberOfOutcomes;
- for (dtrNode = dtrNode._nextSibling;
- dtrNode != null;
- dtrNode = dtrNode._nextSibling) {
- if (!excludedBytes.contains(dtrNode._byte)) {
- result[2] += dtrNode._count;
- }
- }
- return;
- }
- result[0] += dtrNode._count;
- }
- }
- /**
- * The interval for the escape count, less the set of excluded bytes.
- *
- * @param excludedBytes Set of bytes to exclude from counts.
- * @param result Array into which to write the range for the specified
- * bytes.
- */
- void intervalEscape(ByteSet excludedBytes, int[] result) {
- result[2] = (result[1] = totalCount(excludedBytes));
- result[0] = result[1] - _numberOfOutcomes;
- }
- /**
- * Increment the counts for this node for the string specified in the
- * buffer.
- *
- * @param buffer Buffer of bytes from which to read event to increment.
- */
- void increment(ByteBuffer buffer) {
- if (buffer.length() > 0) {
- increment(buffer.bytes(), buffer.offset(), buffer.length());
- }
- }
- /**
- * Returns
- * <code>true</code> if this node has a child with the specified byte,
- * specified as an integer.
- *
- * @param b Byte coded as integer to check.
- * @return </code>true</code> if there is a child node with the specified
- * byte.
- */
- boolean hasDaughter(int i) {
- return hasDaughter(Converter.integerToByte(i));
- }
- /**
- * Returns
- * <code>true</code> if this node has a child with the specified byte.
- *
- * @param b Byte to check.
- * @return </code>true</code> if there is a child node with the specified
- * byte.
- */
- private boolean hasDaughter(byte b) {
- for (PPMNode dtrNode = _firstChild;
- dtrNode != null;
- dtrNode = dtrNode._nextSibling) {
- if (dtrNode._byte == b) {
- return true;
- }
- }
- return false;
- }
- /**
- * Retrieves the symbol for which the midCount is between its low and high
- * counts (inclusive on low, exclusive on high).
- *
- * @param midCount Count for which to find symbol.
- * @param excludedBytes Set of bytes to exclude from counts.
- * @return Symbol with specified count.
- */
- int pointToSymbol(int midCount, ByteSet excludedBytes) {
- int highCount = 0;
- for (PPMNode child = _firstChild;
- child != null;
- child = child._nextSibling) {
- if (excludedBytes.contains(child._byte)) {
- continue;
- }
- highCount += child._count;
- if (highCount > midCount) {
- return Converter.byteToInteger(child._byte);
- }
- }
- return ArithCodeModel.ESCAPE;
- }
- /**
- * Extends this node with the given sequence of bytes, specified by an
- * array, offset and length.
- *
- * @param bytes Byte array providing bytes to extend.
- * @param offset Index of first byte in array.
- * @param length Number of bytes to extend.
- */
- void complete(byte[] bytes, int offset, int length) {
- PPMNode node = this;
- while (length > 0) {
- ++node._numberOfOutcomes;
- node = node._firstChild = new PPMNode(bytes[offset]);
- ++offset;
- --length;
- }
- }
- /**
- * Increment the count of all of the nodes along the sequence of bytes
- * determined by the specified array, beginning at the specified offset and
- * continuing for the specified length number of bytes.
- *
- * @param bytes Array from which to read bytes.
- * @param offset Index of first byte to read from array.
- * @param length Total number of bytes to read from array.
- */
- void increment(byte[] bytes, int offset, int length) {
- if (_firstChild == null) {
- ++_numberOfOutcomes;
- _firstChild = new PPMNode(bytes[offset]);
- if (length > 1) {
- _firstChild.complete(bytes, offset + 1, length - 1);
- }
- return;
- }
- PPMNode previousChild = null; // move to front
- for (PPMNode child = _firstChild; true; child = child._nextSibling) {
- if (child._byte == bytes[offset]) {
- if (length > 1) {
- child.increment(bytes, offset + 1, length - 1);
- }
- if (previousChild != null) { // move to front
- previousChild._nextSibling = child._nextSibling;
- child._nextSibling = _firstChild;
- _firstChild = child;
- }
- if (++child._count > MAX_INDIVIDUAL_COUNT) {
- rescale();
- }
- return;
- }
- if (child._nextSibling == null) {
- ++_numberOfOutcomes;
- // start in front
- _firstChild = new PPMNode(bytes[offset], _firstChild);
- if (length > 1) {
- // start in front
- _firstChild.complete(bytes, offset + 1, length - 1);
- }
- return;
- }
- previousChild = child;
- }
- }
- /**
- * The byte for this node.
- */
- final byte _byte;
- /**
- * The scaled count for this node.
- */
- private short _count = 1;
- /**
- * The scaled number of outcomes used to calculate escape likelihoods.
- */
- private short _numberOfOutcomes; // implied = 0;
- /**
- * The first child of this node.
- */
- PPMNode _firstChild; // implied = null;
- /**
- * The next sibling of this node.
- */
- PPMNode _nextSibling; // implied = null;
- /**
- * Rescale all of the counts of the children of this node. Divides by 2,
- * rounding up, but eliminates all nodes that fall below count threshold.
- * Total number of outcomes is also rescaled, but it will never fall below
- * <code>1</code> to allow possiblity for escapes.
- */
- private void rescale() {
- _numberOfOutcomes = (short) ((_numberOfOutcomes + 1) / 2);
- _firstChild = _firstChild.rescaleSiblings();
- }
- /**
- * Rescale the counts on this node and the siblings of this node. Divides by
- * 2, rounding up, so no count every drops below 1. Returns rescaled node,
- * which may not be original sibling or may be
- * <code>null</code> if siblings scale below
- */
- private PPMNode rescaleSiblings() {
- _count >>= 1; // cheap divide by 2
- if (_nextSibling == null) {
- return (_count < MIN_COUNT) ? null : this;
- }
- if (_count < MIN_COUNT) {
- return _nextSibling.rescaleSiblings();
- }
- _nextSibling = _nextSibling.rescaleSiblings();
- return this;
- }
- /**
- * Minimum count for which to retain a node during rescaling. Surprisingly
- * insensitive.
- */
- private static final int MIN_COUNT = 128;
- /**
- * Maximum count for daughter node before rescaling all daughters. Max value
- * is 8K
- */
- private static final int MAX_INDIVIDUAL_COUNT = 8 * 1024;
- }
advertising
Update the Post
Either update this post and resubmit it with changes, or make a new post.
You may also comment on this post.
Please note that information posted here will expire by default in one month. If you do not want it to expire, please set the expiry time above. If it is set to expire, web search engines will not be allowed to index it prior to it expiring. Items that are not marked to expire will be indexable by search engines. Be careful with your passwords. All illegal activities will be reported and any information will be handed over to the authorities, so be good.