from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import random
import _thread
from queue import Queue
import time
import pymysql


def storage(queue):
    conn = pymysql.connect(host='127.0.0.1', unix_socket='/tmp/mysql.sock', user='root', passwd='', db='mysql', charset='utf8')
    cur = conn.cursor()
    cur.execute('USE wiki_wątki')
    while 1:
        if not queue.empty():
            article = queue.get()
            cur.execute('SELECT * FROM strony WHERE ścieżka = %s', (article["ścieżka"]))
            if cur.rowcount == 0:
                print("Zapisywanie artykułu {}".format(article["tytuł"]))
                cur.execute('INSERT INTO strony (tytuł, ścieżka) VALUES (%s, %s)', (article["tytuł"], article["ścieżka"]))
                conn.commit()
            else:
                print("Artykuł już istnieje: {}".format(article['tytuł']))

visited = []
def getLinks(thread_name, bsObj):
    print('Ekstrakcja odnośników w wątku: {}'.format(thread_name))
    links = bsObj.find('div', {'id':'bodyContent'}).find_all('a', href=re.compile('^(/wiki/)((?!:).)*$'))
    return [link for link in links if link not in visited]

def scrape_article(thread_name, path, queue):
    visited.append(path)
    html = urlopen('http://en.wikipedia.org{}'.format(path))
    time.sleep(5)
    bsObj = BeautifulSoup(html, 'html.parser')
    title = bsObj.find('h1').get_text()
    print('Dodano {} do przechowania w wątku {}'.format(title, thread_name))
    queue.put({"tytuł":title, "ścieżka":path})
    links = getLinks(thread_name, bsObj)
    if len(links) > 0:
        newArticle = links[random.randint(0, len(links)-1)].attrs['href']
        scrape_article(thread_name, newArticle, queue)

queue = Queue()
try:
    _thread.start_new_thread(scrape_article, ('Wątek 1', '/wiki/Kevin_Bacon', queue,))
    _thread.start_new_thread(scrape_article, ('Wątek 2', '/wiki/Monty_Python', queue,))
    _thread.start_new_thread(storage, (queue,))
except:
    print ('Błąd: nie można rozpocząć wątków')

while 1:
    pass