@article{24d4acd4-24cc-4cc1-9139-30562a6b6296, author = {Łukasz Dębowski}, title = {Hilberg’s Conjecture – a Challenge for Machine Learning}, journal = {Schedae Informaticae}, volume = {2014}, number = {Volume 23}, year = {2015}, issn = {1732-3916}, pages = {33-44},keywords = {statistical language modeling; Hilberg’s conjecture; maximal repetition; grammar-based codes; Santa Fe processes}, abstract = {We review three mathematical developments linked with Hilberg’s conjecture – a hypothesis about the power-law growth of entropy of texts in natural language, which sets up a challenge for machine learning. First, considerations concerning maximal repetition indicate that universal codes such as the Lempel-Ziv code may fail to efficiently compress sources that satisfy Hilberg’s conjecture. Second, Hilberg’s conjecture implies the empirically observed power-law growth of vocabulary in texts. Third, Hilberg’s conjecture can be explained by a hypothesis that texts describe consistently an infinite random object.}, doi = {10.4467/20838476SI.14.003.3020}, url = {https://ejournals.eu/en/journal/schedae-informaticae/article/hilbergs-conjecture-a-challenge-for-machine-learning} }