some fixes and add ancestor methods to MathDB class
This commit is contained in:
292
MathGen.py
292
MathGen.py
@@ -5,6 +5,11 @@ import time
|
|||||||
from igraph import Graph
|
from igraph import Graph
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from sqlite3 import Error
|
from sqlite3 import Error
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
#logging.warning('Watch out!') # will print a message to the console
|
||||||
|
#logging.info('I told you so') # will not print anything
|
||||||
|
|
||||||
|
|
||||||
class mathPage:
|
class mathPage:
|
||||||
@@ -21,15 +26,17 @@ class mathPage:
|
|||||||
self.year = None
|
self.year = None
|
||||||
self.diss = None
|
self.diss = None
|
||||||
self.advisorID = [None, None, None]
|
self.advisorID = [None, None, None]
|
||||||
|
self.students = "{}"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_entry(self):
|
def get_entry(self):
|
||||||
return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2]))
|
return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2], self.students))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_info(self):
|
def get_info(self):
|
||||||
|
logging.info(f'Downloading info to mathID {self.id} from the online database.')
|
||||||
temp = requests.get(self.url)
|
temp = requests.get(self.url)
|
||||||
temp.encoding = "utf-8"
|
temp.encoding = "utf-8"
|
||||||
self.page = temp.text
|
self.page = temp.text
|
||||||
@@ -138,7 +145,30 @@ class mathPage:
|
|||||||
studentsYear.append(s)
|
studentsYear.append(s)
|
||||||
self.studentsYear = studentsYear
|
self.studentsYear = studentsYear
|
||||||
|
|
||||||
|
i = 1
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
descText = self.tree.xpath(f'//*[@id="paddingWrapper"]/p[{i}]/text()')
|
||||||
|
except IndexError:
|
||||||
|
break
|
||||||
|
if ("According to our current" in descText[0]) or ("No students known" in descText[0]):
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if ("No students known" in descText[0]):
|
||||||
|
self.students = json.dumps({})
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
numStudents = int(''.join(filter(str.isdigit, descText[0])))
|
||||||
|
except ValueError:
|
||||||
|
numStudents = 0
|
||||||
|
try:
|
||||||
|
numDescendants = int(''.join(filter(str.isdigit, descText[1])))
|
||||||
|
except ValueError:
|
||||||
|
numDescendants = 0
|
||||||
|
self.students = json.dumps({"MathID": studentsID, "Institute": studentsInst, "Year": studentsYear, "Students": numStudents, "Descendants": numDescendants})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class mathDB:
|
class mathDB:
|
||||||
@@ -147,6 +177,15 @@ class mathDB:
|
|||||||
self.db_file = db_file
|
self.db_file = db_file
|
||||||
self.init_db()
|
self.init_db()
|
||||||
|
|
||||||
|
|
||||||
|
def get_cursor(self, connection=None):
|
||||||
|
if connection == None:
|
||||||
|
conn = self.create_connection()
|
||||||
|
else:
|
||||||
|
conn = connection
|
||||||
|
cur = conn.cursor()
|
||||||
|
return(cur, conn)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def create_connection(self):
|
def create_connection(self):
|
||||||
@@ -188,7 +227,8 @@ class mathDB:
|
|||||||
thesis text,
|
thesis text,
|
||||||
first_advisor integer,
|
first_advisor integer,
|
||||||
second_advisor integer,
|
second_advisor integer,
|
||||||
third_advisor integer
|
third_advisor integer,
|
||||||
|
students text
|
||||||
);"""
|
);"""
|
||||||
if conn is not None:
|
if conn is not None:
|
||||||
self.create_table(conn, sql_create_main_table)
|
self.create_table(conn, sql_create_main_table)
|
||||||
@@ -197,16 +237,30 @@ class mathDB:
|
|||||||
print("Cannot create database connection")
|
print("Cannot create database connection")
|
||||||
|
|
||||||
|
|
||||||
|
def add_person(self, mathID, connection=None):
|
||||||
|
if not self.exists(mathID):
|
||||||
|
try:
|
||||||
|
p = mathPage(mathID)
|
||||||
|
p.get_info()
|
||||||
|
self.insert_person(p, connection=connection)
|
||||||
|
except ValueError:
|
||||||
|
print(f"Error: No entry with mathID {mathID} exists.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def insert_person(self, mathPage, connection=None):
|
def insert_person(self, mathPage, connection=None):
|
||||||
|
entry = mathPage.get_entry()
|
||||||
|
if entry[1] == None:
|
||||||
|
raise ValueError("No such entry in the math genealogy project found.")
|
||||||
|
return
|
||||||
if connection == None:
|
if connection == None:
|
||||||
conn = self.create_connection()
|
conn = self.create_connection()
|
||||||
else:
|
else:
|
||||||
conn = connection
|
conn = connection
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
row = self.get_person(mathPage.id, conn)
|
row = self.get_person(mathPage.id, conn)
|
||||||
if len(row) == 0:
|
if row[0] == 0:
|
||||||
cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?)", mathPage.get_entry())
|
cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?,?)", entry)
|
||||||
else:
|
else:
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
UPDATE mathematicians
|
UPDATE mathematicians
|
||||||
@@ -217,21 +271,61 @@ class mathDB:
|
|||||||
thesis = ?,
|
thesis = ?,
|
||||||
first_advisor = ?,
|
first_advisor = ?,
|
||||||
second_advisor = ?,
|
second_advisor = ?,
|
||||||
third_advisor = ?
|
third_advisor = ?,
|
||||||
|
students = ?
|
||||||
WHERE id = ?
|
WHERE id = ?
|
||||||
""", (mathPage.name, mathPage.title, mathPage.inst, mathPage.year, mathPage.diss, mathPage.advisorID[0], mathPage.advisorID[1],mathPage.advisorID[2], mathPage.id))
|
""", (mathPage.name, mathPage.title, mathPage.inst, mathPage.year, mathPage.diss, mathPage.advisorID[0], mathPage.advisorID[1],mathPage.advisorID[2], mathPage.students, mathPage.id))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
if connection == None:
|
if connection == None:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def find_missing(self, limit):
|
|
||||||
|
def get_students(self, mathID, connection=None):
|
||||||
|
# conn = sqlite3.connect(self.db_file)
|
||||||
|
cur, conn = self.get_cursor(connection)
|
||||||
|
students = []
|
||||||
|
if not self.exists(mathID):
|
||||||
|
try:
|
||||||
|
self.add_person(mathID, connection=connection)
|
||||||
|
except ValueError:
|
||||||
|
print(f"Error: No entry with mathID {mathID} exists.")
|
||||||
|
return(students)
|
||||||
|
cur.execute("""
|
||||||
|
SELECT id
|
||||||
|
FROM mathematicians
|
||||||
|
WHERE first_advisor = ?
|
||||||
|
OR second_advisor = ?
|
||||||
|
OR third_advisor = ?
|
||||||
|
""", (mathID,mathID,mathID),)
|
||||||
|
temp = cur.fetchall()
|
||||||
|
for s in temp:
|
||||||
|
students.append(s[0])
|
||||||
|
if connection == None:
|
||||||
|
conn.close()
|
||||||
|
return(students)
|
||||||
|
|
||||||
|
def get_students_entry(self, mathID, connection=None):
|
||||||
|
cur, conn = self.get_cursor(connection)
|
||||||
|
if not self.exists(mathID):
|
||||||
|
try:
|
||||||
|
self.add_person(mathID, connection)
|
||||||
|
except ValueError:
|
||||||
|
print(f"Error: No entry with mathID {mathID} exists.")
|
||||||
|
p = self.get_person(mathID, connection=conn)
|
||||||
|
students = json.loads(p[9])
|
||||||
|
return(students["MathID"])
|
||||||
|
if connection == None:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def find_missing(self, limit, connection=None):
|
||||||
"""
|
"""
|
||||||
Find missing entries in database.
|
Find missing entries in database.
|
||||||
"""
|
"""
|
||||||
conn = sqlite3.connect(self.db_file)
|
# conn = sqlite3.connect(self.db_file)
|
||||||
cur = conn.cursor()
|
cur,conn = self.get_cursor(connection)
|
||||||
missing = []
|
missing = []
|
||||||
if type(limit) == int:
|
if type(limit) == int:
|
||||||
limit = range(1, limit+1)
|
limit = range(1, limit+1)
|
||||||
@@ -250,17 +344,18 @@ class mathDB:
|
|||||||
return(missing)
|
return(missing)
|
||||||
if p == 0:
|
if p == 0:
|
||||||
missing.append(i)
|
missing.append(i)
|
||||||
conn.close()
|
if connection == None:
|
||||||
|
conn.close()
|
||||||
return(missing)
|
return(missing)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def exists(self, id):
|
def exists(self, mathID, connection=None):
|
||||||
conn = sqlite3.connect(self.db_file)
|
cur,conn = self.get_cursor(connection)
|
||||||
cur = conn.cursor()
|
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (mathID,),)
|
||||||
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (id,))
|
|
||||||
p = cur.fetchall()[0][0]
|
p = cur.fetchall()[0][0]
|
||||||
conn.close()
|
if connection == None:
|
||||||
|
conn.close()
|
||||||
if p == 1:
|
if p == 1:
|
||||||
return(True)
|
return(True)
|
||||||
else:
|
else:
|
||||||
@@ -276,34 +371,34 @@ class mathDB:
|
|||||||
|
|
||||||
|
|
||||||
def check_missing_data(self, id):
|
def check_missing_data(self, id):
|
||||||
conn = sqlite3.connect(self.db_file)
|
cur,conn = self.get_cursor(connection)
|
||||||
cur = conn.cursor()
|
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (id,),)
|
||||||
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (id,))
|
|
||||||
p = cur.fetchall()[0][0]
|
p = cur.fetchall()[0][0]
|
||||||
if p == 0:
|
if p == 0:
|
||||||
conn.close()
|
if connection == None:
|
||||||
|
conn.close()
|
||||||
return((1,1,1,1,1,1,1,1))
|
return((1,1,1,1,1,1,1,1))
|
||||||
else:
|
else:
|
||||||
cur.execute("SELECT * FROM mathematicians WHERE id = ?", (id,))
|
cur.execute("SELECT * FROM mathematicians WHERE id = ?", (id,),)
|
||||||
data = cur.fetchall()[0]
|
data = cur.fetchall()[0]
|
||||||
conn.close()
|
if connection == None:
|
||||||
|
conn.close()
|
||||||
return(data)
|
return(data)
|
||||||
# print(data)
|
# print(data)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_person(self, id, connection=None):
|
def get_person(self, id, connection=None):
|
||||||
if connection == None:
|
cur,conn = self.get_cursor(connection)
|
||||||
conn = self.create_connection()
|
res = cur.execute(f"SELECT * FROM mathematicians WHERE id = ?", (id,),)
|
||||||
else:
|
|
||||||
conn = connection
|
|
||||||
conn = self.create_connection()
|
|
||||||
cur = conn.cursor()
|
|
||||||
res = cur.execute(f"SELECT * FROM mathematicians WHERE id = {id}")
|
|
||||||
res = cur.fetchall()
|
res = cur.fetchall()
|
||||||
if connection == None:
|
if connection == None:
|
||||||
conn.close()
|
conn.close()
|
||||||
return res
|
if len(res) == 0:
|
||||||
|
return((0,0,0,0,0,0,0,0,0,0))
|
||||||
|
else:
|
||||||
|
return(res[0])
|
||||||
|
|
||||||
|
|
||||||
# def _makeLimitIterable(self, limit):
|
# def _makeLimitIterable(self, limit):
|
||||||
# if type(limit) == int:
|
# if type(limit) == int:
|
||||||
@@ -315,6 +410,65 @@ class mathDB:
|
|||||||
# print("Wrong parameter. Limit must be an iterable object or an integer")
|
# print("Wrong parameter. Limit must be an iterable object or an integer")
|
||||||
# return(ret)
|
# return(ret)
|
||||||
|
|
||||||
|
def get_ancestors(self, mathID, depth=0, connection=None):
|
||||||
|
cur,conn = self.get_cursor(connection)
|
||||||
|
i = 0
|
||||||
|
anc = {mathID}
|
||||||
|
anc_new = {mathID}
|
||||||
|
if not self.exists(mathID):
|
||||||
|
self.add_person(mathID)
|
||||||
|
while True:
|
||||||
|
anc_temp = set()
|
||||||
|
if i > depth and not depth == 0:
|
||||||
|
break
|
||||||
|
for a in anc_new:
|
||||||
|
p = self.get_person(a, connection=conn)
|
||||||
|
for j in range(3):
|
||||||
|
if p[6+j] > 0:
|
||||||
|
anc_temp.add(p[6+j])
|
||||||
|
anc = anc | anc_new
|
||||||
|
anc_new = anc_temp
|
||||||
|
if len(anc_new) == 0:
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
if connection == None:
|
||||||
|
conn.close()
|
||||||
|
return(anc)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def add_ancestors(self, mathID, depth=0, connection=None):
|
||||||
|
cur,conn = self.get_cursor(connection)
|
||||||
|
i = 0
|
||||||
|
anc = {mathID}
|
||||||
|
anc_new = {mathID}
|
||||||
|
if not self.exists(mathID):
|
||||||
|
self.add_person(mathID)
|
||||||
|
while True:
|
||||||
|
anc_temp = set()
|
||||||
|
if i > depth and not depth == 0:
|
||||||
|
break
|
||||||
|
for a in anc_new:
|
||||||
|
p = self.get_person(a, connection=connection)
|
||||||
|
for j in range(3):
|
||||||
|
if p[6+j] > 0:
|
||||||
|
anc_temp.add(p[6+j])
|
||||||
|
# print(p[6+j])
|
||||||
|
anc = anc | anc_new
|
||||||
|
anc_new = anc_temp
|
||||||
|
if len(anc_new) == 0:
|
||||||
|
break
|
||||||
|
for a in anc_new:
|
||||||
|
if not self.exists(a):
|
||||||
|
self.add_person(a, connection=connection)
|
||||||
|
i += 1
|
||||||
|
if connection == None:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def add_decendants(self, mathID, depth=0, connection=None):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def populate_db(self, limit, chunk = 10):
|
def populate_db(self, limit, chunk = 10):
|
||||||
conn = sqlite3.connect(self.db_file)
|
conn = sqlite3.connect(self.db_file)
|
||||||
@@ -335,14 +489,11 @@ class mathDB:
|
|||||||
thread = threading.Thread(target = persons[j].get_info)
|
thread = threading.Thread(target = persons[j].get_info)
|
||||||
threads.append(thread)
|
threads.append(thread)
|
||||||
thread.start()
|
thread.start()
|
||||||
|
|
||||||
for j, thread in enumerate(threads):
|
for j, thread in enumerate(threads):
|
||||||
thread.join()
|
thread.join()
|
||||||
|
|
||||||
for j in range(chunk):
|
for j in range(chunk):
|
||||||
print(f"Adding MathID {limit[i*chunk+j]} to database. Entry {i*chunk+j+1} of {l}.", end= '\r')
|
print(f"Adding MathID {limit[i*chunk+j]} to database. Entry {i*chunk+j+1} of {l}.", end= '\r')
|
||||||
self.insert_person(persons[j], conn)
|
self.insert_person(persons[j], conn)
|
||||||
|
|
||||||
threads = list()
|
threads = list()
|
||||||
persons = list()
|
persons = list()
|
||||||
for j in range(r):
|
for j in range(r):
|
||||||
@@ -351,18 +502,14 @@ class mathDB:
|
|||||||
thread = threading.Thread(target = persons[j].get_info)
|
thread = threading.Thread(target = persons[j].get_info)
|
||||||
threads.append(thread)
|
threads.append(thread)
|
||||||
thread.start()
|
thread.start()
|
||||||
|
|
||||||
for j, thread in enumerate(threads):
|
for j, thread in enumerate(threads):
|
||||||
|
|
||||||
thread.join()
|
thread.join()
|
||||||
|
|
||||||
for j in range(r):
|
for j in range(r):
|
||||||
print(f"Adding MathID {limit[n*chunk+j]} to database. Entry {n*chunk+j+1} of {l}.", end= '\r')
|
print(f"Adding MathID {limit[n*chunk+j]} to database. Entry {n*chunk+j+1} of {l}.", end= '\r')
|
||||||
self.insert_person(persons[j], conn)
|
self.insert_person(persons[j], conn)
|
||||||
# print(f"Downloading entry {i} of {limit}", end= '\r')
|
# print(f"Downloading entry {i} of {limit}", end= '\r')
|
||||||
# page = mathPage(i)
|
# page = mathPage(i)
|
||||||
# self.insert_person(page)
|
# self.insert_person(page)
|
||||||
|
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
@@ -373,17 +520,55 @@ class mathGenealogy(Graph):
|
|||||||
def __init__(self, DB="MathGen.db", vertices = None, directed=True):
|
def __init__(self, DB="MathGen.db", vertices = None, directed=True):
|
||||||
self.db = mathDB(DB)
|
self.db = mathDB(DB)
|
||||||
super().__init__(directed=directed)
|
super().__init__(directed=directed)
|
||||||
#self.vs["name"] = ""
|
self.vs["name"] = ""
|
||||||
|
|
||||||
def add_person(self, vertex):
|
def add_person(self, mathID, root=0, line=0, force=False):
|
||||||
if "name" in vertex.keys():
|
person = self.db.get_person(mathID)
|
||||||
self.add_vertex(vertex["name"])
|
if person[0] == 0:
|
||||||
else:
|
p = mathPage(mathID)
|
||||||
print("Mandatory entry 'name' is missing")
|
p.get_info()
|
||||||
return()
|
entry = p.get_entry()
|
||||||
for key in vertex:
|
if entry[1] == None:
|
||||||
val = vertex[key]
|
raise ValueError("No such entry in the math genealogy project found.")
|
||||||
print(f"Dictionary contains {key} with value {val}")
|
return
|
||||||
|
else:
|
||||||
|
self.db.insert_person(p)
|
||||||
|
person = self.db.get_person(mathID)
|
||||||
|
if str(mathID) not in self.vs['name']:
|
||||||
|
vID = self.add_vertex(str(mathID)).index
|
||||||
|
self._insert_person(person, vID, root, line)
|
||||||
|
|
||||||
|
elif force:
|
||||||
|
vID = graph.vs.find(name=str(mathID)).index
|
||||||
|
self._insert_person(person, vID, root, line)
|
||||||
|
|
||||||
|
|
||||||
|
def _insert_person(self, person, vID, root, line):
|
||||||
|
students = json.loads(person[9])
|
||||||
|
self.vs[vID]["Name"] = person[1]
|
||||||
|
self.vs[vID]["Title"] = person[2]
|
||||||
|
self.vs[vID]["Year"] = str(person[3])
|
||||||
|
self.vs[vID]["Dissertation"] = person[4]
|
||||||
|
self.vs[vID]["Institution"] = person[5]
|
||||||
|
self.vs[vID]["Students"] = students["MathID"]
|
||||||
|
self.vs[vID]["Advisors"] = person[6:8]
|
||||||
|
self.vs[vID]["Line"] = line
|
||||||
|
self.vs[vID]["roots"] = [root]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# if "name" in vertex.keys():
|
||||||
|
# self.add_vertex(vertex["name"])
|
||||||
|
# else:
|
||||||
|
# print("Mandatory entry 'name' is missing")
|
||||||
|
# return()
|
||||||
|
# for key in vertex:
|
||||||
|
# val = vertex[key]
|
||||||
|
# print(f"Dictionary contains {key} with value {val}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -430,6 +615,17 @@ class mathGenealogy(Graph):
|
|||||||
|
|
||||||
total_entries = 297377 # number of records as of 2 October 2023
|
total_entries = 297377 # number of records as of 2 October 2023
|
||||||
|
|
||||||
|
Greven_ID = 29360 # Andreas Greven
|
||||||
|
Aumann_ID= 36548 # Georg Aumann
|
||||||
|
Anita_ID = 92324 # Anita
|
||||||
|
Fourier_ID = 17981 # Fourier
|
||||||
|
Wolfgang_ID = 150286 # Wolfgang
|
||||||
|
Eichelsbacher_ID = 27275 # Peter
|
||||||
|
Anton_ID = 125956 # Anton
|
||||||
|
Pfaffelhuber_ID = 157881 # Peter Pfaffelhuber
|
||||||
|
Ruess_ID = 75966 # Ruess
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
testDB = mathDB("test.db")
|
testDB = mathDB("test.db")
|
||||||
test = mathPage(0)
|
test = mathPage(0)
|
||||||
|
|||||||
Reference in New Issue
Block a user