diff --git a/src/main/java/net/agkn/hll/HLL.java b/src/main/java/net/agkn/hll/HLL.java
index cdfd3ad..7741a37 100644
--- a/src/main/java/net/agkn/hll/HLL.java
+++ b/src/main/java/net/agkn/hll/HLL.java
@@ -16,6 +16,7 @@
* limitations under the License.
*/
+import java.io.Serializable;
import java.util.Arrays;
import it.unimi.dsi.fastutil.ints.Int2ByteOpenHashMap;
@@ -34,12 +35,12 @@
/**
* A probabilistic set of hashed long
elements. Useful for computing
- * the approximate cardinality of a stream of data in very small storage.
+ * the approximate cardinality of a stream of data in very small storage.
*
* A modified version of the
* 'HyperLogLog' data structure and algorithm is used, which combines both
* probabilistic and non-probabilistic techniques to improve the accuracy and
- * storage requirements of the original algorithm.
+ * storage requirements of the original algorithm.
*
* More specifically, initializing and storing a new {@link HLL} will
* allocate a sentinel value symbolizing the empty set ({@link HLLType#EMPTY}).
@@ -48,7 +49,7 @@
* be sacrificed for memory footprint: the values in the sorted list are
* "promoted" to a "{@link HLLType#SPARSE}" map-based HyperLogLog structure.
* Finally, when enough registers are set, the map-based HLL will be converted
- * to a bit-packed "{@link HLLType#FULL}" HyperLogLog structure.
+ * to a bit-packed "{@link HLLType#FULL}" HyperLogLog structure.
*
* This data structure is interoperable with the implementations found at:
*
@@ -59,7 +60,7 @@
*
* @author timon
*/
-public class HLL implements Cloneable {
+public class HLL implements Cloneable, Serializable {
// minimum and maximum values for the log-base-2 of the number of registers
// in the HLL
public static final int MINIMUM_LOG2M_PARAM = 4;
@@ -156,7 +157,7 @@ public class HLL implements Cloneable {
* @param expthresh tunes when the {@link HLLType#EXPLICIT} to
* {@link HLLType#SPARSE} promotion occurs,
* based on the set's cardinality. Must be at least -1 and at most 18.
- *
+ *
* expthresh value | Meaning |
*
*
@@ -238,7 +239,7 @@ public HLL(final int log2m, final int regwidth, final int expthresh, final boole
}
/**
- * Construct an empty HLL with the given {@code log2m} and {@code regwidth}.
+ * Construct an empty HLL with the given {@code log2m} and {@code regwidth}.
*
* This is equivalent to calling HLL(log2m, regwidth, -1, true, HLLType.EMPTY)
.
*
@@ -596,7 +597,7 @@ public long cardinality() {
// Clear
/**
* Clears the HLL. The HLL will have cardinality zero and will act as if no
- * elements have been added.
+ * elements have been added.
*
* NOTE: Unlike {@link #addRaw(long)}, clear
does NOT handle
* transitions between {@link HLLType}s - a probabilistic type will remain
@@ -938,7 +939,7 @@ public byte[] toBytes(final ISchemaVersion schemaVersion) {
/**
* Deserializes the HLL (in {@link #toBytes(ISchemaVersion)} format) serialized
- * into bytes
.
+ * into bytes
.
*
* @param bytes the serialized bytes of new HLL
* @return the deserialized HLL. This will never be null
.
diff --git a/src/main/java/net/agkn/hll/serialization/BigEndianAscendingWordSerializer.java b/src/main/java/net/agkn/hll/serialization/BigEndianAscendingWordSerializer.java
index 3ad7ce1..5b713ec 100644
--- a/src/main/java/net/agkn/hll/serialization/BigEndianAscendingWordSerializer.java
+++ b/src/main/java/net/agkn/hll/serialization/BigEndianAscendingWordSerializer.java
@@ -22,13 +22,13 @@
* a low bit in a byte. However, a high byte in a word is written at a lower index
* in the array than a low byte in a word. The first word is written at the lowest
* array index. Each serializer is one time use and returns its backing byte
- * array.
+ * array.
*
* This encoding was chosen so that when reading bytes as octets in the typical
* first-octet-is-the-high-nibble fashion, an octet-to-binary conversion
- * would yield a high-to-low, left-to-right view of the "short words".
+ * would yield a high-to-low, left-to-right view of the "short words".
*
- * Example:
+ * Example:
*
* Say short words are 5 bits wide. Our word sequence is the values
* [31, 1, 5]
. In big-endian binary format, the values are
diff --git a/src/main/java/net/agkn/hll/serialization/IWordDeserializer.java b/src/main/java/net/agkn/hll/serialization/IWordDeserializer.java
index e11fd7b..e8ab12b 100644
--- a/src/main/java/net/agkn/hll/serialization/IWordDeserializer.java
+++ b/src/main/java/net/agkn/hll/serialization/IWordDeserializer.java
@@ -29,7 +29,7 @@ public interface IWordDeserializer {
long readWord();
/**
- * Returns the number of words that could be encoded in the sequence.
+ * Returns the number of words that could be encoded in the sequence.
*
* NOTE: the sequence that was encoded may be shorter than the value this
* method returns due to padding issues within bytes. This guarantees
@@ -39,4 +39,4 @@ public interface IWordDeserializer {
* @return the maximum number of words that could be read from the sequence.
*/
int totalWordCount();
-}
\ No newline at end of file
+}
diff --git a/src/main/java/net/agkn/hll/serialization/SerializationUtil.java b/src/main/java/net/agkn/hll/serialization/SerializationUtil.java
index d03c4cf..f0b7e23 100644
--- a/src/main/java/net/agkn/hll/serialization/SerializationUtil.java
+++ b/src/main/java/net/agkn/hll/serialization/SerializationUtil.java
@@ -91,7 +91,7 @@ public class SerializationUtil {
* List of registered schema versions, indexed by their version numbers. If
* an entry is null
, then no such schema version is registered.
* Similarly, registering a new schema version simply entails assigning an
- * {@link ISchemaVersion} instance to the appropriate index of this array.
+ * {@link ISchemaVersion} instance to the appropriate index of this array.
*
* By default, only {@link SchemaVersionOne} is registered. Note that version
* zero will always be reserved for internal (e.g. proprietary, legacy) schema
@@ -172,7 +172,7 @@ public static byte packVersionByte(final int schemaVersion, final int typeOrdina
* If 'auto' is chosen, this value should be 63
.
*
* -
- * If a cutoff of 2n is desired, for
0 <= n < 31
,
+ * If a cutoff of 2n is desired, for 0 <= n < 31
,
* this value should be n + 1
.
*
*
@@ -190,7 +190,7 @@ public static byte packCutoffByte(final int explicitCutoff, final boolean sparse
/**
* Generates a byte that encodes the parameters of a
* {@link HLLType#FULL} or {@link HLLType#SPARSE}
- * HLL.
+ * HLL.
*
* The top 3 bits are used to encode registerWidth - 1
* (range of registerWidth
is thus 1-9) and the bottom 5
diff --git a/src/main/java/net/agkn/hll/util/BitVector.java b/src/main/java/net/agkn/hll/util/BitVector.java
index 56687b1..11caf31 100644
--- a/src/main/java/net/agkn/hll/util/BitVector.java
+++ b/src/main/java/net/agkn/hll/util/BitVector.java
@@ -18,6 +18,8 @@
import net.agkn.hll.serialization.IWordSerializer;
+import java.io.Serializable;
+
/**
* A vector (array) of bits that is accessed in units ("registers") of width
* bits which are stored as 64bit "words" (long
s). In this context
@@ -25,7 +27,7 @@
*
* @author rgrzywinski
*/
-public class BitVector implements Cloneable {
+public class BitVector implements Cloneable, Serializable {
// NOTE: in this context, a word is 64bits
// rather than doing division to determine how a bit index fits into 64bit
@@ -172,7 +174,7 @@ public LongIterator registerIterator() {
/**
* Sets the value of the specified index register if and only if the specified
* value is greater than the current value in the register. This is equivalent
- * to but much more performant than:
+ * to but much more performant than:
*
* vector.setRegister(index, Math.max(vector.getRegister(index), value));
*
@@ -259,4 +261,4 @@ public BitVector clone() {
System.arraycopy(words, 0, copy.words, 0, words.length);
return copy;
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/net/agkn/hll/util/HLLUtil.java b/src/main/java/net/agkn/hll/util/HLLUtil.java
index d3f97f2..cba9425 100644
--- a/src/main/java/net/agkn/hll/util/HLLUtil.java
+++ b/src/main/java/net/agkn/hll/util/HLLUtil.java
@@ -148,7 +148,7 @@ public static long pwMaxMask(final int registerSizeInBits) {
* The cutoff for using the "small range correction" formula, in the
* HyperLogLog algorithm.
*
- * @param m the number of registers in the HLL. m in the paper.
+ * @param m the number of registers in the HLL. m in the paper.
* @return the cutoff for the small range correction.
* @see #smallEstimator(int, int)
*/
@@ -161,7 +161,7 @@ public static double smallEstimatorCutoff(final int m) {
* appropriate if both the estimator is smaller than (5/2) * m
and
* there are still registers that have the zero value.
*
- * @param m the number of registers in the HLL. m in the paper.
+ * @param m the number of registers in the HLL. m in the paper.
* @param numberOfZeroes the number of registers with value zero. V
* in the paper.
* @return a corrected cardinality estimate.
@@ -174,7 +174,7 @@ public static double smallEstimator(final int m, final int numberOfZeroes) {
* The cutoff for using the "large range correction" formula, from the
* HyperLogLog algorithm, adapted for 64 bit hashes.
*
- * @param log2m log-base-2 of the number of registers in the HLL. b in the paper.
+ * @param log2m log-base-2 of the number of registers in the HLL. b in the paper.
* @param registerSizeInBits the size of the HLL registers, in bits.
* @return the cutoff for the large range correction.
* @see #largeEstimator(int, int, double)
@@ -189,7 +189,7 @@ public static double largeEstimatorCutoff(final int log2m, final int registerSiz
* for 64 bit hashes. Only appropriate for estimators whose value exceeds
* the return of {@link #largeEstimatorCutoff(int, int)}.
*
- * @param log2m log-base-2 of the number of registers in the HLL. b in the paper.
+ * @param log2m log-base-2 of the number of registers in the HLL. b in the paper.
* @param registerSizeInBits the size of the HLL registers, in bits.
* @param estimator the original estimator ("E" in the paper).
* @return a corrected cardinality estimate.