#!/usr/bin/env python

import countmemaybe
import numpy as np
import pylab as py


def leading_set_bit(number):
    number_binary = bin(number)
    return len(number_binary) - number_binary.rfind("1")


class HLL(object):
    max_index = 0

    def add(self, number):
        index = leading_set_bit(number)
        self.max_index = max(self.max_index, index)

    def __len__(self):
        return 2 ** self.max_index


if __name__ == "__main__":
    data_list = []
    h1 = HLL()
    h = countmemaybe.HyperLogLog()
    for i in range(100000):
        item = "seee%seeeed234rsdaf" % i
        x = h._hash(item)
        h1.add(x)
        h.add(x)
        data_list.append((i + 1, len(h1), len(h)))

    data_numpy = np.asarray(data_list)
    py.plot(data_numpy[:, 0], data_numpy[:, 1], ":", label="Pojedynczy rejestr HLL)
    py.plot(data_numpy[:, 0], data_numpy[:, 2], "--", label="HLL z 16 rejestrami")
    py.plot(data_numpy[:, 0], data_numpy[:, 0], label="Rzeczywisty rozmiar")
    py.legend(loc="upper left")

    py.title("Wydajność pojedynczego rejestru HLL")
    py.xlabel("Wielkość zbioru")
    py.ylabel("Przewidywana wielkość zbioru")

    # py.show()
    py.savefig("../hll_single_reg.png")
