overload methods of mathDB to allow passing of connection object

This commit is contained in:
2023-10-06 11:32:57 +02:00
parent e9a446d027
commit 94a3dc862b

View File

@@ -22,9 +22,13 @@ class mathPage:
self.diss = None self.diss = None
self.advisorID = [None, None, None] self.advisorID = [None, None, None]
def getEntry(self): def getEntry(self):
return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2])) return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2]))
def getInfo(self): def getInfo(self):
temp = requests.get(self.url) temp = requests.get(self.url)
temp.encoding = "utf-8" temp.encoding = "utf-8"
@@ -38,13 +42,17 @@ class mathPage:
else: else:
self.tree = html.fromstring(self.page) self.tree = html.fromstring(self.page)
self.parsePage() self.parsePage()
def checkPage(self): def checkPage(self):
""" """
Check if the page exists before parsing to avoid errors. Check if the page exists before parsing to avoid errors.
""" """
pass pass
def parsePage(self): def parsePage(self):
name1 = self.tree.xpath('//*[@id="paddingWrapper"]/h2/text()') name1 = self.tree.xpath('//*[@id="paddingWrapper"]/h2/text()')
self.name = str(name1[0]).replace(" "," ").strip() self.name = str(name1[0]).replace(" "," ").strip()
@@ -92,12 +100,8 @@ class mathPage:
# print(s) ### Debugging # print(s) ### Debugging
advisor.append(str(s).replace(" "," ")) advisor.append(str(s).replace(" "," "))
#---------------------------------------- #----------------------------------------
self.advisor = advisor self.advisor = advisor
advisorID1 = self.tree.xpath('//*[@id="paddingWrapper"]/p[2]/a/@href') advisorID1 = self.tree.xpath('//*[@id="paddingWrapper"]/p[2]/a/@href')
advisorID = list() advisorID = list()
for s in advisorID1: for s in advisorID1:
@@ -107,7 +111,6 @@ class mathPage:
if "Chrono" not in s: if "Chrono" not in s:
advisorID.append(int(str(s)[10:])) advisorID.append(int(str(s)[10:]))
if len(advisorID) == 0: if len(advisorID) == 0:
advisorID.append(0) advisorID.append(0)
if len(advisorID) == 1: if len(advisorID) == 1:
@@ -135,11 +138,16 @@ class mathPage:
studentsYear.append(s) studentsYear.append(s)
self.studentsYear = studentsYear self.studentsYear = studentsYear
class mathDB: class mathDB:
def __init__(self, db_file): def __init__(self, db_file):
self.db_file = db_file self.db_file = db_file
self.initDB() self.initDB()
def createConnection(self): def createConnection(self):
""" create a database connection to a SQLite database """ """ create a database connection to a SQLite database """
conn = None conn = None
@@ -152,6 +160,8 @@ class mathDB:
if conn: if conn:
return(conn) return(conn)
def createTable(self, conn, create_table_sql): def createTable(self, conn, create_table_sql):
""" create a table from the create_table_sql statement """ create a table from the create_table_sql statement
:param conn: Connection object :param conn: Connection object
@@ -185,10 +195,15 @@ class mathDB:
else: else:
print("Cannot create database connection") print("Cannot create database connection")
def insertPerson(self, mathPage):
conn = self.createConnection()
def insertPerson(self, mathPage, connection=None):
if connection == None:
conn = self.createConnection()
else:
conn = connection
cur = conn.cursor() cur = conn.cursor()
row = self.getPerson(mathPage.id) row = self.getPerson(mathPage.id, conn)
if len(row) == 0: if len(row) == 0:
cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?)", mathPage.getEntry()) cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?)", mathPage.getEntry())
else: else:
@@ -205,8 +220,11 @@ class mathDB:
WHERE id = ? WHERE id = ?
""", (mathPage.name, mathPage.title, mathPage.inst, mathPage.year, mathPage.diss, mathPage.advisorID[0], mathPage.advisorID[1],mathPage.advisorID[2], mathPage.id)) """, (mathPage.name, mathPage.title, mathPage.inst, mathPage.year, mathPage.diss, mathPage.advisorID[0], mathPage.advisorID[1],mathPage.advisorID[2], mathPage.id))
conn.commit() conn.commit()
conn.close() if connection == None:
conn.close()
def findMissing(self, limit): def findMissing(self, limit):
""" """
Find missing entries in database. Find missing entries in database.
@@ -234,6 +252,8 @@ class mathDB:
conn.close() conn.close()
return(missing) return(missing)
def exists(self, id): def exists(self, id):
conn = sqlite3.connect(self.db_file) conn = sqlite3.connect(self.db_file)
cur = conn.cursor() cur = conn.cursor()
@@ -245,11 +265,15 @@ class mathDB:
else: else:
return(False) return(False)
def fetchMissing(self, limit): def fetchMissing(self, limit):
missing = self.findMissing(limit) missing = self.findMissing(limit)
if len(missing) > 0: if len(missing) > 0:
self.populateDB(missing) self.populateDB(missing)
def checkMissingData(self, id): def checkMissingData(self, id):
conn = sqlite3.connect(self.db_file) conn = sqlite3.connect(self.db_file)
cur = conn.cursor() cur = conn.cursor()
@@ -265,16 +289,19 @@ class mathDB:
return(data) return(data)
# print(data) # print(data)
def getPerson(self, id, connection=None):
if connection == None:
conn = self.createConnection()
def getPerson(self, id): else:
conn = connection
conn = self.createConnection() conn = self.createConnection()
cur = conn.cursor() cur = conn.cursor()
res = cur.execute(f"SELECT * FROM mathematicians WHERE id = {id}") res = cur.execute(f"SELECT * FROM mathematicians WHERE id = {id}")
res = cur.fetchall() res = cur.fetchall()
conn.close() if connection == None:
conn.close()
return res return res
# def _makeLimitIterable(self, limit): # def _makeLimitIterable(self, limit):
@@ -289,6 +316,7 @@ class mathDB:
def populateDB(self, limit, chunk = 10): def populateDB(self, limit, chunk = 10):
conn = sqlite3.connect(self.db_file)
if type(limit) == int: if type(limit) == int:
limit = range(1, limit+1) limit = range(1, limit+1)
try: try:
@@ -312,7 +340,7 @@ class mathDB:
for j in range(chunk): for j in range(chunk):
print(f"Adding MathID {limit[i*chunk+j]} to database. Entry {i*chunk+j+1} of {l}.", end= '\r') print(f"Adding MathID {limit[i*chunk+j]} to database. Entry {i*chunk+j+1} of {l}.", end= '\r')
self.insertPerson(persons[j]) self.insertPerson(persons[j], conn)
threads = list() threads = list()
persons = list() persons = list()
@@ -328,18 +356,24 @@ class mathDB:
for j in range(r): for j in range(r):
print(f"Adding MathID {limit[n*chunk+j]} to database. Entry {n*chunk+j+1} of {l}.", end= '\r') print(f"Adding MathID {limit[n*chunk+j]} to database. Entry {n*chunk+j+1} of {l}.", end= '\r')
self.insertPerson(persons[j]) self.insertPerson(persons[j], conn)
# print(f"Downloading entry {i} of {limit}", end= '\r') # print(f"Downloading entry {i} of {limit}", end= '\r')
# page = mathPage(i) # page = mathPage(i)
# self.insertPerson(page) # self.insertPerson(page)
conn.close()
class mathGenealogy: class mathGenealogy:
pass pass
total_entries = 297377 # number of records as of 2 October 2023
total_entries = 297377 # number of records as of 2 October 2023
if __name__ == "__main__": if __name__ == "__main__":
testDB = mathDB("test.db") testDB = mathDB("test.db")
test = mathPage(0) test = mathPage(0)