overload methods of mathDB to allow passing of connection object
This commit is contained in:
72
MathGen.py
72
MathGen.py
@@ -22,9 +22,13 @@ class mathPage:
|
|||||||
self.diss = None
|
self.diss = None
|
||||||
self.advisorID = [None, None, None]
|
self.advisorID = [None, None, None]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def getEntry(self):
|
def getEntry(self):
|
||||||
return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2]))
|
return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2]))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def getInfo(self):
|
def getInfo(self):
|
||||||
temp = requests.get(self.url)
|
temp = requests.get(self.url)
|
||||||
temp.encoding = "utf-8"
|
temp.encoding = "utf-8"
|
||||||
@@ -38,13 +42,17 @@ class mathPage:
|
|||||||
else:
|
else:
|
||||||
self.tree = html.fromstring(self.page)
|
self.tree = html.fromstring(self.page)
|
||||||
self.parsePage()
|
self.parsePage()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def checkPage(self):
|
def checkPage(self):
|
||||||
"""
|
"""
|
||||||
Check if the page exists before parsing to avoid errors.
|
Check if the page exists before parsing to avoid errors.
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parsePage(self):
|
def parsePage(self):
|
||||||
name1 = self.tree.xpath('//*[@id="paddingWrapper"]/h2/text()')
|
name1 = self.tree.xpath('//*[@id="paddingWrapper"]/h2/text()')
|
||||||
self.name = str(name1[0]).replace(" "," ").strip()
|
self.name = str(name1[0]).replace(" "," ").strip()
|
||||||
@@ -92,12 +100,8 @@ class mathPage:
|
|||||||
# print(s) ### Debugging
|
# print(s) ### Debugging
|
||||||
advisor.append(str(s).replace(" "," "))
|
advisor.append(str(s).replace(" "," "))
|
||||||
#----------------------------------------
|
#----------------------------------------
|
||||||
|
|
||||||
|
|
||||||
self.advisor = advisor
|
self.advisor = advisor
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
advisorID1 = self.tree.xpath('//*[@id="paddingWrapper"]/p[2]/a/@href')
|
advisorID1 = self.tree.xpath('//*[@id="paddingWrapper"]/p[2]/a/@href')
|
||||||
advisorID = list()
|
advisorID = list()
|
||||||
for s in advisorID1:
|
for s in advisorID1:
|
||||||
@@ -107,7 +111,6 @@ class mathPage:
|
|||||||
if "Chrono" not in s:
|
if "Chrono" not in s:
|
||||||
advisorID.append(int(str(s)[10:]))
|
advisorID.append(int(str(s)[10:]))
|
||||||
|
|
||||||
|
|
||||||
if len(advisorID) == 0:
|
if len(advisorID) == 0:
|
||||||
advisorID.append(0)
|
advisorID.append(0)
|
||||||
if len(advisorID) == 1:
|
if len(advisorID) == 1:
|
||||||
@@ -135,11 +138,16 @@ class mathPage:
|
|||||||
studentsYear.append(s)
|
studentsYear.append(s)
|
||||||
self.studentsYear = studentsYear
|
self.studentsYear = studentsYear
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class mathDB:
|
class mathDB:
|
||||||
def __init__(self, db_file):
|
def __init__(self, db_file):
|
||||||
self.db_file = db_file
|
self.db_file = db_file
|
||||||
self.initDB()
|
self.initDB()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def createConnection(self):
|
def createConnection(self):
|
||||||
""" create a database connection to a SQLite database """
|
""" create a database connection to a SQLite database """
|
||||||
conn = None
|
conn = None
|
||||||
@@ -152,6 +160,8 @@ class mathDB:
|
|||||||
if conn:
|
if conn:
|
||||||
return(conn)
|
return(conn)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def createTable(self, conn, create_table_sql):
|
def createTable(self, conn, create_table_sql):
|
||||||
""" create a table from the create_table_sql statement
|
""" create a table from the create_table_sql statement
|
||||||
:param conn: Connection object
|
:param conn: Connection object
|
||||||
@@ -185,10 +195,15 @@ class mathDB:
|
|||||||
else:
|
else:
|
||||||
print("Cannot create database connection")
|
print("Cannot create database connection")
|
||||||
|
|
||||||
def insertPerson(self, mathPage):
|
|
||||||
conn = self.createConnection()
|
|
||||||
|
def insertPerson(self, mathPage, connection=None):
|
||||||
|
if connection == None:
|
||||||
|
conn = self.createConnection()
|
||||||
|
else:
|
||||||
|
conn = connection
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
row = self.getPerson(mathPage.id)
|
row = self.getPerson(mathPage.id, conn)
|
||||||
if len(row) == 0:
|
if len(row) == 0:
|
||||||
cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?)", mathPage.getEntry())
|
cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?)", mathPage.getEntry())
|
||||||
else:
|
else:
|
||||||
@@ -205,8 +220,11 @@ class mathDB:
|
|||||||
WHERE id = ?
|
WHERE id = ?
|
||||||
""", (mathPage.name, mathPage.title, mathPage.inst, mathPage.year, mathPage.diss, mathPage.advisorID[0], mathPage.advisorID[1],mathPage.advisorID[2], mathPage.id))
|
""", (mathPage.name, mathPage.title, mathPage.inst, mathPage.year, mathPage.diss, mathPage.advisorID[0], mathPage.advisorID[1],mathPage.advisorID[2], mathPage.id))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
if connection == None:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def findMissing(self, limit):
|
def findMissing(self, limit):
|
||||||
"""
|
"""
|
||||||
Find missing entries in database.
|
Find missing entries in database.
|
||||||
@@ -234,6 +252,8 @@ class mathDB:
|
|||||||
conn.close()
|
conn.close()
|
||||||
return(missing)
|
return(missing)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def exists(self, id):
|
def exists(self, id):
|
||||||
conn = sqlite3.connect(self.db_file)
|
conn = sqlite3.connect(self.db_file)
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
@@ -245,11 +265,15 @@ class mathDB:
|
|||||||
else:
|
else:
|
||||||
return(False)
|
return(False)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def fetchMissing(self, limit):
|
def fetchMissing(self, limit):
|
||||||
missing = self.findMissing(limit)
|
missing = self.findMissing(limit)
|
||||||
if len(missing) > 0:
|
if len(missing) > 0:
|
||||||
self.populateDB(missing)
|
self.populateDB(missing)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def checkMissingData(self, id):
|
def checkMissingData(self, id):
|
||||||
conn = sqlite3.connect(self.db_file)
|
conn = sqlite3.connect(self.db_file)
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
@@ -265,16 +289,19 @@ class mathDB:
|
|||||||
return(data)
|
return(data)
|
||||||
# print(data)
|
# print(data)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def getPerson(self, id, connection=None):
|
||||||
|
if connection == None:
|
||||||
|
conn = self.createConnection()
|
||||||
def getPerson(self, id):
|
else:
|
||||||
|
conn = connection
|
||||||
conn = self.createConnection()
|
conn = self.createConnection()
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
res = cur.execute(f"SELECT * FROM mathematicians WHERE id = {id}")
|
res = cur.execute(f"SELECT * FROM mathematicians WHERE id = {id}")
|
||||||
res = cur.fetchall()
|
res = cur.fetchall()
|
||||||
conn.close()
|
if connection == None:
|
||||||
|
conn.close()
|
||||||
return res
|
return res
|
||||||
|
|
||||||
# def _makeLimitIterable(self, limit):
|
# def _makeLimitIterable(self, limit):
|
||||||
@@ -289,6 +316,7 @@ class mathDB:
|
|||||||
|
|
||||||
|
|
||||||
def populateDB(self, limit, chunk = 10):
|
def populateDB(self, limit, chunk = 10):
|
||||||
|
conn = sqlite3.connect(self.db_file)
|
||||||
if type(limit) == int:
|
if type(limit) == int:
|
||||||
limit = range(1, limit+1)
|
limit = range(1, limit+1)
|
||||||
try:
|
try:
|
||||||
@@ -312,7 +340,7 @@ class mathDB:
|
|||||||
|
|
||||||
for j in range(chunk):
|
for j in range(chunk):
|
||||||
print(f"Adding MathID {limit[i*chunk+j]} to database. Entry {i*chunk+j+1} of {l}.", end= '\r')
|
print(f"Adding MathID {limit[i*chunk+j]} to database. Entry {i*chunk+j+1} of {l}.", end= '\r')
|
||||||
self.insertPerson(persons[j])
|
self.insertPerson(persons[j], conn)
|
||||||
|
|
||||||
threads = list()
|
threads = list()
|
||||||
persons = list()
|
persons = list()
|
||||||
@@ -328,18 +356,24 @@ class mathDB:
|
|||||||
|
|
||||||
for j in range(r):
|
for j in range(r):
|
||||||
print(f"Adding MathID {limit[n*chunk+j]} to database. Entry {n*chunk+j+1} of {l}.", end= '\r')
|
print(f"Adding MathID {limit[n*chunk+j]} to database. Entry {n*chunk+j+1} of {l}.", end= '\r')
|
||||||
self.insertPerson(persons[j])
|
self.insertPerson(persons[j], conn)
|
||||||
# print(f"Downloading entry {i} of {limit}", end= '\r')
|
# print(f"Downloading entry {i} of {limit}", end= '\r')
|
||||||
# page = mathPage(i)
|
# page = mathPage(i)
|
||||||
# self.insertPerson(page)
|
# self.insertPerson(page)
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class mathGenealogy:
|
class mathGenealogy:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
total_entries = 297377 # number of records as of 2 October 2023
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
total_entries = 297377 # number of records as of 2 October 2023
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
testDB = mathDB("test.db")
|
testDB = mathDB("test.db")
|
||||||
test = mathPage(0)
|
test = mathPage(0)
|
||||||
|
|||||||
Reference in New Issue
Block a user