@article{90cddb53-82f8-4e68-9531-7c65b021ce17,
author = {Krzysztof Ulman, Krzysztof Rzecki},
title = {Detection algorithm for content on Internet web portals},
journal = {Czasopismo Techniczne},
volume = {2012},
number = {Nauki Podstawowe Zeszyt 1-NP (18) 2012},
year = {2012},
issn = {0011-4561},
pages = {1-1},keywords = {web pages contents recognition; data mining; web scraping; data collection; web pages structure analysis; HTML},
abstract = {The paper shows steps, made during designing and implementing automatic web pages contents recognition algorithm, based on HTML structure analysis. A web page contents is the article text with its headline, without any other text like menu, advertisements, user’s comments, image captions, etc.},
doi = {10.4467/2353737XCT.14.090.1867},
url = {https://ejournals.eu/czasopismo/czasopismo-techniczne/artykul/detection-algorithm-for-content-on-internet-web-portals}
}