@article{90cddb53-82f8-4e68-9531-7c65b021ce17, author = {Krzysztof Ulman, Krzysztof Rzecki}, title = {Detection algorithm for content on Internet web portals}, journal = {Czasopismo Techniczne}, volume = {2012}, number = {Nauki Podstawowe Zeszyt 1-NP (18) 2012}, year = {2012}, issn = {0011-4561}, pages = {1-1},keywords = {web pages contents recognition; data mining; web scraping; data collection; web pages structure analysis; HTML}, abstract = {The paper shows steps, made during designing and implementing automatic web pages contents recognition algorithm, based on HTML structure analysis. A web page contents is the article text with its headline, without any other text like menu, advertisements, user’s comments, image captions, etc.}, doi = {10.4467/2353737XCT.14.090.1867}, url = {https://ejournals.eu/czasopismo/czasopismo-techniczne/artykul/detection-algorithm-for-content-on-internet-web-portals} }