I am web-scraping a few websites on Debian with Python 2.7, but maybe my code automatically stop (if it can't load in time (freeze) or there isn't an internet connection).
Is there any solution to solve this, and maybe just skip the problem and run my code to the next URL? Because if I get a problem like this, this script just stop automatically..
Here my code:
#!/usr/bin/python
#!/bin/sh
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
from selenium import webdriver
import urllib2
import subprocess
import unicodecsv as csv
import os
import sys
import io
import time
import datetime
import pandas as pd
import MySQLdb
import re
import contextlib
import selenium.webdriver.support.ui as ui
import numpy as np
from datetime import datetime, timedelta
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pyautogui
from pykeyboard import PyKeyboard
reload(sys)
sys.setdefaultencoding('utf-8')
cols = ['MYCOLS..']
browser = webdriver.Firefox()
datatable=[]
browser.get('LINK1')
time.sleep(5)
browser.find_element_by_xpath('//button[contains(text(), "CLICK EVENT")]').click()
time.sleep(5)
browser.find_element_by_xpath('//button[contains(text(), "CLICK EVENT")]').click()
html = browser.page_source
soup=BeautifulSoup(html,"html.parser")
table = soup.find('table', { "class" : "table table-condensed table-hover data-table m-n-t-15" })
for record in table.find_all('tr', class_="hidden-xs hidden-sm ng-scope"):
for data in record.find_all("td"):
temp_data.append(data.text.encode('utf-8'))
newlist = filter(None, temp_data)
datatable.append(newlist)
time.sleep(10)
browser.close()
#HERE I INSERT MY DATAES INTO MYSQL..IT IS NOT IMPORTANT, AND MY SECOND LINK STARTING HERE
browser = webdriver.Firefox()
datatable=[]
browser.get('LINK2')
browser.find_element_by_xpath('//button[contains(text(), "LCLICK EVENT")]').click()
time.sleep(5)
html = browser.page_source
soup=BeautifulSoup(html,"html.parser")
table = soup.find('table', { "class" : "table table-condensed table-hover data-table m-n-t-15" })
for record in table.find_all('tr', class_="hidden-xs hidden-sm ng-scope"):
for data in record.find_all("td"):
temp_data.append(data.text.encode('utf-8'))
newlist = filter(None, temp_data)
datatable.append(newlist)
time.sleep(10)
browser.close()
#MYSQLDB PART AGAIN...AND THE NEXT LINK IS COMING.
+1 EDIT:
When the script not find this CLICK EVENT stop too. Why? How can I avoid this?