some fixes and add ancestor methods to MathDB class

This commit is contained in:
2023-10-08 21:53:21 +02:00
parent 99a38a456d
commit a941ee38da

View File

@@ -5,6 +5,11 @@ import time
from igraph import Graph
import sqlite3
from sqlite3 import Error
import json
import re
import logging
#logging.warning('Watch out!') # will print a message to the console
#logging.info('I told you so') # will not print anything
class mathPage:
@@ -21,15 +26,17 @@ class mathPage:
self.year = None
self.diss = None
self.advisorID = [None, None, None]
self.students = "{}"
def get_entry(self):
return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2]))
return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2], self.students))
def get_info(self):
logging.info(f'Downloading info to mathID {self.id} from the online database.')
temp = requests.get(self.url)
temp.encoding = "utf-8"
self.page = temp.text
@@ -138,7 +145,30 @@ class mathPage:
studentsYear.append(s)
self.studentsYear = studentsYear
i = 1
while True:
try:
descText = self.tree.xpath(f'//*[@id="paddingWrapper"]/p[{i}]/text()')
except IndexError:
break
if ("According to our current" in descText[0]) or ("No students known" in descText[0]):
break
i += 1
if ("No students known" in descText[0]):
self.students = json.dumps({})
else:
try:
numStudents = int(''.join(filter(str.isdigit, descText[0])))
except ValueError:
numStudents = 0
try:
numDescendants = int(''.join(filter(str.isdigit, descText[1])))
except ValueError:
numDescendants = 0
self.students = json.dumps({"MathID": studentsID, "Institute": studentsInst, "Year": studentsYear, "Students": numStudents, "Descendants": numDescendants})
class mathDB:
@@ -147,6 +177,15 @@ class mathDB:
self.db_file = db_file
self.init_db()
def get_cursor(self, connection=None):
if connection == None:
conn = self.create_connection()
else:
conn = connection
cur = conn.cursor()
return(cur, conn)
def create_connection(self):
@@ -188,7 +227,8 @@ class mathDB:
thesis text,
first_advisor integer,
second_advisor integer,
third_advisor integer
third_advisor integer,
students text
);"""
if conn is not None:
self.create_table(conn, sql_create_main_table)
@@ -197,16 +237,30 @@ class mathDB:
print("Cannot create database connection")
def add_person(self, mathID, connection=None):
if not self.exists(mathID):
try:
p = mathPage(mathID)
p.get_info()
self.insert_person(p, connection=connection)
except ValueError:
print(f"Error: No entry with mathID {mathID} exists.")
def insert_person(self, mathPage, connection=None):
entry = mathPage.get_entry()
if entry[1] == None:
raise ValueError("No such entry in the math genealogy project found.")
return
if connection == None:
conn = self.create_connection()
else:
conn = connection
cur = conn.cursor()
row = self.get_person(mathPage.id, conn)
if len(row) == 0:
cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?)", mathPage.get_entry())
if row[0] == 0:
cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?,?)", entry)
else:
cur.execute("""
UPDATE mathematicians
@@ -217,21 +271,61 @@ class mathDB:
thesis = ?,
first_advisor = ?,
second_advisor = ?,
third_advisor = ?
third_advisor = ?,
students = ?
WHERE id = ?
""", (mathPage.name, mathPage.title, mathPage.inst, mathPage.year, mathPage.diss, mathPage.advisorID[0], mathPage.advisorID[1],mathPage.advisorID[2], mathPage.id))
""", (mathPage.name, mathPage.title, mathPage.inst, mathPage.year, mathPage.diss, mathPage.advisorID[0], mathPage.advisorID[1],mathPage.advisorID[2], mathPage.students, mathPage.id))
conn.commit()
if connection == None:
conn.close()
def find_missing(self, limit):
def get_students(self, mathID, connection=None):
# conn = sqlite3.connect(self.db_file)
cur, conn = self.get_cursor(connection)
students = []
if not self.exists(mathID):
try:
self.add_person(mathID, connection=connection)
except ValueError:
print(f"Error: No entry with mathID {mathID} exists.")
return(students)
cur.execute("""
SELECT id
FROM mathematicians
WHERE first_advisor = ?
OR second_advisor = ?
OR third_advisor = ?
""", (mathID,mathID,mathID),)
temp = cur.fetchall()
for s in temp:
students.append(s[0])
if connection == None:
conn.close()
return(students)
def get_students_entry(self, mathID, connection=None):
cur, conn = self.get_cursor(connection)
if not self.exists(mathID):
try:
self.add_person(mathID, connection)
except ValueError:
print(f"Error: No entry with mathID {mathID} exists.")
p = self.get_person(mathID, connection=conn)
students = json.loads(p[9])
return(students["MathID"])
if connection == None:
conn.close()
def find_missing(self, limit, connection=None):
"""
Find missing entries in database.
"""
conn = sqlite3.connect(self.db_file)
cur = conn.cursor()
# conn = sqlite3.connect(self.db_file)
cur,conn = self.get_cursor(connection)
missing = []
if type(limit) == int:
limit = range(1, limit+1)
@@ -250,17 +344,18 @@ class mathDB:
return(missing)
if p == 0:
missing.append(i)
conn.close()
if connection == None:
conn.close()
return(missing)
def exists(self, id):
conn = sqlite3.connect(self.db_file)
cur = conn.cursor()
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (id,))
def exists(self, mathID, connection=None):
cur,conn = self.get_cursor(connection)
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (mathID,),)
p = cur.fetchall()[0][0]
conn.close()
if connection == None:
conn.close()
if p == 1:
return(True)
else:
@@ -276,34 +371,34 @@ class mathDB:
def check_missing_data(self, id):
conn = sqlite3.connect(self.db_file)
cur = conn.cursor()
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (id,))
cur,conn = self.get_cursor(connection)
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (id,),)
p = cur.fetchall()[0][0]
if p == 0:
conn.close()
if connection == None:
conn.close()
return((1,1,1,1,1,1,1,1))
else:
cur.execute("SELECT * FROM mathematicians WHERE id = ?", (id,))
cur.execute("SELECT * FROM mathematicians WHERE id = ?", (id,),)
data = cur.fetchall()[0]
conn.close()
if connection == None:
conn.close()
return(data)
# print(data)
def get_person(self, id, connection=None):
if connection == None:
conn = self.create_connection()
else:
conn = connection
conn = self.create_connection()
cur = conn.cursor()
res = cur.execute(f"SELECT * FROM mathematicians WHERE id = {id}")
cur,conn = self.get_cursor(connection)
res = cur.execute(f"SELECT * FROM mathematicians WHERE id = ?", (id,),)
res = cur.fetchall()
if connection == None:
conn.close()
return res
if len(res) == 0:
return((0,0,0,0,0,0,0,0,0,0))
else:
return(res[0])
# def _makeLimitIterable(self, limit):
# if type(limit) == int:
@@ -315,6 +410,65 @@ class mathDB:
# print("Wrong parameter. Limit must be an iterable object or an integer")
# return(ret)
def get_ancestors(self, mathID, depth=0, connection=None):
cur,conn = self.get_cursor(connection)
i = 0
anc = {mathID}
anc_new = {mathID}
if not self.exists(mathID):
self.add_person(mathID)
while True:
anc_temp = set()
if i > depth and not depth == 0:
break
for a in anc_new:
p = self.get_person(a, connection=conn)
for j in range(3):
if p[6+j] > 0:
anc_temp.add(p[6+j])
anc = anc | anc_new
anc_new = anc_temp
if len(anc_new) == 0:
break
i += 1
if connection == None:
conn.close()
return(anc)
def add_ancestors(self, mathID, depth=0, connection=None):
cur,conn = self.get_cursor(connection)
i = 0
anc = {mathID}
anc_new = {mathID}
if not self.exists(mathID):
self.add_person(mathID)
while True:
anc_temp = set()
if i > depth and not depth == 0:
break
for a in anc_new:
p = self.get_person(a, connection=connection)
for j in range(3):
if p[6+j] > 0:
anc_temp.add(p[6+j])
# print(p[6+j])
anc = anc | anc_new
anc_new = anc_temp
if len(anc_new) == 0:
break
for a in anc_new:
if not self.exists(a):
self.add_person(a, connection=connection)
i += 1
if connection == None:
conn.close()
def add_decendants(self, mathID, depth=0, connection=None):
pass
def populate_db(self, limit, chunk = 10):
conn = sqlite3.connect(self.db_file)
@@ -335,14 +489,11 @@ class mathDB:
thread = threading.Thread(target = persons[j].get_info)
threads.append(thread)
thread.start()
for j, thread in enumerate(threads):
thread.join()
for j in range(chunk):
print(f"Adding MathID {limit[i*chunk+j]} to database. Entry {i*chunk+j+1} of {l}.", end= '\r')
self.insert_person(persons[j], conn)
threads = list()
persons = list()
for j in range(r):
@@ -351,18 +502,14 @@ class mathDB:
thread = threading.Thread(target = persons[j].get_info)
threads.append(thread)
thread.start()
for j, thread in enumerate(threads):
thread.join()
for j in range(r):
print(f"Adding MathID {limit[n*chunk+j]} to database. Entry {n*chunk+j+1} of {l}.", end= '\r')
self.insert_person(persons[j], conn)
# print(f"Downloading entry {i} of {limit}", end= '\r')
# page = mathPage(i)
# self.insert_person(page)
conn.close()
@@ -373,17 +520,55 @@ class mathGenealogy(Graph):
def __init__(self, DB="MathGen.db", vertices = None, directed=True):
self.db = mathDB(DB)
super().__init__(directed=directed)
#self.vs["name"] = ""
self.vs["name"] = ""
def add_person(self, vertex):
if "name" in vertex.keys():
self.add_vertex(vertex["name"])
else:
print("Mandatory entry 'name' is missing")
return()
for key in vertex:
val = vertex[key]
print(f"Dictionary contains {key} with value {val}")
def add_person(self, mathID, root=0, line=0, force=False):
person = self.db.get_person(mathID)
if person[0] == 0:
p = mathPage(mathID)
p.get_info()
entry = p.get_entry()
if entry[1] == None:
raise ValueError("No such entry in the math genealogy project found.")
return
else:
self.db.insert_person(p)
person = self.db.get_person(mathID)
if str(mathID) not in self.vs['name']:
vID = self.add_vertex(str(mathID)).index
self._insert_person(person, vID, root, line)
elif force:
vID = graph.vs.find(name=str(mathID)).index
self._insert_person(person, vID, root, line)
def _insert_person(self, person, vID, root, line):
students = json.loads(person[9])
self.vs[vID]["Name"] = person[1]
self.vs[vID]["Title"] = person[2]
self.vs[vID]["Year"] = str(person[3])
self.vs[vID]["Dissertation"] = person[4]
self.vs[vID]["Institution"] = person[5]
self.vs[vID]["Students"] = students["MathID"]
self.vs[vID]["Advisors"] = person[6:8]
self.vs[vID]["Line"] = line
self.vs[vID]["roots"] = [root]
# if "name" in vertex.keys():
# self.add_vertex(vertex["name"])
# else:
# print("Mandatory entry 'name' is missing")
# return()
# for key in vertex:
# val = vertex[key]
# print(f"Dictionary contains {key} with value {val}")
@@ -430,6 +615,17 @@ class mathGenealogy(Graph):
total_entries = 297377 # number of records as of 2 October 2023
Greven_ID = 29360 # Andreas Greven
Aumann_ID= 36548 # Georg Aumann
Anita_ID = 92324 # Anita
Fourier_ID = 17981 # Fourier
Wolfgang_ID = 150286 # Wolfgang
Eichelsbacher_ID = 27275 # Peter
Anton_ID = 125956 # Anton
Pfaffelhuber_ID = 157881 # Peter Pfaffelhuber
Ruess_ID = 75966 # Ruess
if __name__ == "__main__":
testDB = mathDB("test.db")
test = mathPage(0)