some fixes and add ancestor methods to MathDB class
This commit is contained in:
292
MathGen.py
292
MathGen.py
@@ -5,6 +5,11 @@ import time
|
||||
from igraph import Graph
|
||||
import sqlite3
|
||||
from sqlite3 import Error
|
||||
import json
|
||||
import re
|
||||
import logging
|
||||
#logging.warning('Watch out!') # will print a message to the console
|
||||
#logging.info('I told you so') # will not print anything
|
||||
|
||||
|
||||
class mathPage:
|
||||
@@ -21,15 +26,17 @@ class mathPage:
|
||||
self.year = None
|
||||
self.diss = None
|
||||
self.advisorID = [None, None, None]
|
||||
self.students = "{}"
|
||||
|
||||
|
||||
|
||||
def get_entry(self):
|
||||
return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2]))
|
||||
return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2], self.students))
|
||||
|
||||
|
||||
|
||||
def get_info(self):
|
||||
logging.info(f'Downloading info to mathID {self.id} from the online database.')
|
||||
temp = requests.get(self.url)
|
||||
temp.encoding = "utf-8"
|
||||
self.page = temp.text
|
||||
@@ -138,7 +145,30 @@ class mathPage:
|
||||
studentsYear.append(s)
|
||||
self.studentsYear = studentsYear
|
||||
|
||||
i = 1
|
||||
while True:
|
||||
try:
|
||||
descText = self.tree.xpath(f'//*[@id="paddingWrapper"]/p[{i}]/text()')
|
||||
except IndexError:
|
||||
break
|
||||
if ("According to our current" in descText[0]) or ("No students known" in descText[0]):
|
||||
break
|
||||
i += 1
|
||||
|
||||
if ("No students known" in descText[0]):
|
||||
self.students = json.dumps({})
|
||||
else:
|
||||
try:
|
||||
numStudents = int(''.join(filter(str.isdigit, descText[0])))
|
||||
except ValueError:
|
||||
numStudents = 0
|
||||
try:
|
||||
numDescendants = int(''.join(filter(str.isdigit, descText[1])))
|
||||
except ValueError:
|
||||
numDescendants = 0
|
||||
self.students = json.dumps({"MathID": studentsID, "Institute": studentsInst, "Year": studentsYear, "Students": numStudents, "Descendants": numDescendants})
|
||||
|
||||
|
||||
|
||||
|
||||
class mathDB:
|
||||
@@ -147,6 +177,15 @@ class mathDB:
|
||||
self.db_file = db_file
|
||||
self.init_db()
|
||||
|
||||
|
||||
def get_cursor(self, connection=None):
|
||||
if connection == None:
|
||||
conn = self.create_connection()
|
||||
else:
|
||||
conn = connection
|
||||
cur = conn.cursor()
|
||||
return(cur, conn)
|
||||
|
||||
|
||||
|
||||
def create_connection(self):
|
||||
@@ -188,7 +227,8 @@ class mathDB:
|
||||
thesis text,
|
||||
first_advisor integer,
|
||||
second_advisor integer,
|
||||
third_advisor integer
|
||||
third_advisor integer,
|
||||
students text
|
||||
);"""
|
||||
if conn is not None:
|
||||
self.create_table(conn, sql_create_main_table)
|
||||
@@ -197,16 +237,30 @@ class mathDB:
|
||||
print("Cannot create database connection")
|
||||
|
||||
|
||||
def add_person(self, mathID, connection=None):
|
||||
if not self.exists(mathID):
|
||||
try:
|
||||
p = mathPage(mathID)
|
||||
p.get_info()
|
||||
self.insert_person(p, connection=connection)
|
||||
except ValueError:
|
||||
print(f"Error: No entry with mathID {mathID} exists.")
|
||||
|
||||
|
||||
|
||||
def insert_person(self, mathPage, connection=None):
|
||||
entry = mathPage.get_entry()
|
||||
if entry[1] == None:
|
||||
raise ValueError("No such entry in the math genealogy project found.")
|
||||
return
|
||||
if connection == None:
|
||||
conn = self.create_connection()
|
||||
else:
|
||||
conn = connection
|
||||
cur = conn.cursor()
|
||||
row = self.get_person(mathPage.id, conn)
|
||||
if len(row) == 0:
|
||||
cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?)", mathPage.get_entry())
|
||||
if row[0] == 0:
|
||||
cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?,?)", entry)
|
||||
else:
|
||||
cur.execute("""
|
||||
UPDATE mathematicians
|
||||
@@ -217,21 +271,61 @@ class mathDB:
|
||||
thesis = ?,
|
||||
first_advisor = ?,
|
||||
second_advisor = ?,
|
||||
third_advisor = ?
|
||||
third_advisor = ?,
|
||||
students = ?
|
||||
WHERE id = ?
|
||||
""", (mathPage.name, mathPage.title, mathPage.inst, mathPage.year, mathPage.diss, mathPage.advisorID[0], mathPage.advisorID[1],mathPage.advisorID[2], mathPage.id))
|
||||
""", (mathPage.name, mathPage.title, mathPage.inst, mathPage.year, mathPage.diss, mathPage.advisorID[0], mathPage.advisorID[1],mathPage.advisorID[2], mathPage.students, mathPage.id))
|
||||
conn.commit()
|
||||
if connection == None:
|
||||
conn.close()
|
||||
|
||||
|
||||
|
||||
def find_missing(self, limit):
|
||||
|
||||
def get_students(self, mathID, connection=None):
|
||||
# conn = sqlite3.connect(self.db_file)
|
||||
cur, conn = self.get_cursor(connection)
|
||||
students = []
|
||||
if not self.exists(mathID):
|
||||
try:
|
||||
self.add_person(mathID, connection=connection)
|
||||
except ValueError:
|
||||
print(f"Error: No entry with mathID {mathID} exists.")
|
||||
return(students)
|
||||
cur.execute("""
|
||||
SELECT id
|
||||
FROM mathematicians
|
||||
WHERE first_advisor = ?
|
||||
OR second_advisor = ?
|
||||
OR third_advisor = ?
|
||||
""", (mathID,mathID,mathID),)
|
||||
temp = cur.fetchall()
|
||||
for s in temp:
|
||||
students.append(s[0])
|
||||
if connection == None:
|
||||
conn.close()
|
||||
return(students)
|
||||
|
||||
def get_students_entry(self, mathID, connection=None):
|
||||
cur, conn = self.get_cursor(connection)
|
||||
if not self.exists(mathID):
|
||||
try:
|
||||
self.add_person(mathID, connection)
|
||||
except ValueError:
|
||||
print(f"Error: No entry with mathID {mathID} exists.")
|
||||
p = self.get_person(mathID, connection=conn)
|
||||
students = json.loads(p[9])
|
||||
return(students["MathID"])
|
||||
if connection == None:
|
||||
conn.close()
|
||||
|
||||
|
||||
|
||||
|
||||
def find_missing(self, limit, connection=None):
|
||||
"""
|
||||
Find missing entries in database.
|
||||
"""
|
||||
conn = sqlite3.connect(self.db_file)
|
||||
cur = conn.cursor()
|
||||
# conn = sqlite3.connect(self.db_file)
|
||||
cur,conn = self.get_cursor(connection)
|
||||
missing = []
|
||||
if type(limit) == int:
|
||||
limit = range(1, limit+1)
|
||||
@@ -250,17 +344,18 @@ class mathDB:
|
||||
return(missing)
|
||||
if p == 0:
|
||||
missing.append(i)
|
||||
conn.close()
|
||||
if connection == None:
|
||||
conn.close()
|
||||
return(missing)
|
||||
|
||||
|
||||
|
||||
def exists(self, id):
|
||||
conn = sqlite3.connect(self.db_file)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (id,))
|
||||
def exists(self, mathID, connection=None):
|
||||
cur,conn = self.get_cursor(connection)
|
||||
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (mathID,),)
|
||||
p = cur.fetchall()[0][0]
|
||||
conn.close()
|
||||
if connection == None:
|
||||
conn.close()
|
||||
if p == 1:
|
||||
return(True)
|
||||
else:
|
||||
@@ -276,34 +371,34 @@ class mathDB:
|
||||
|
||||
|
||||
def check_missing_data(self, id):
|
||||
conn = sqlite3.connect(self.db_file)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (id,))
|
||||
cur,conn = self.get_cursor(connection)
|
||||
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (id,),)
|
||||
p = cur.fetchall()[0][0]
|
||||
if p == 0:
|
||||
conn.close()
|
||||
if connection == None:
|
||||
conn.close()
|
||||
return((1,1,1,1,1,1,1,1))
|
||||
else:
|
||||
cur.execute("SELECT * FROM mathematicians WHERE id = ?", (id,))
|
||||
cur.execute("SELECT * FROM mathematicians WHERE id = ?", (id,),)
|
||||
data = cur.fetchall()[0]
|
||||
conn.close()
|
||||
if connection == None:
|
||||
conn.close()
|
||||
return(data)
|
||||
# print(data)
|
||||
|
||||
|
||||
|
||||
def get_person(self, id, connection=None):
|
||||
if connection == None:
|
||||
conn = self.create_connection()
|
||||
else:
|
||||
conn = connection
|
||||
conn = self.create_connection()
|
||||
cur = conn.cursor()
|
||||
res = cur.execute(f"SELECT * FROM mathematicians WHERE id = {id}")
|
||||
cur,conn = self.get_cursor(connection)
|
||||
res = cur.execute(f"SELECT * FROM mathematicians WHERE id = ?", (id,),)
|
||||
res = cur.fetchall()
|
||||
if connection == None:
|
||||
conn.close()
|
||||
return res
|
||||
if len(res) == 0:
|
||||
return((0,0,0,0,0,0,0,0,0,0))
|
||||
else:
|
||||
return(res[0])
|
||||
|
||||
|
||||
# def _makeLimitIterable(self, limit):
|
||||
# if type(limit) == int:
|
||||
@@ -315,6 +410,65 @@ class mathDB:
|
||||
# print("Wrong parameter. Limit must be an iterable object or an integer")
|
||||
# return(ret)
|
||||
|
||||
def get_ancestors(self, mathID, depth=0, connection=None):
|
||||
cur,conn = self.get_cursor(connection)
|
||||
i = 0
|
||||
anc = {mathID}
|
||||
anc_new = {mathID}
|
||||
if not self.exists(mathID):
|
||||
self.add_person(mathID)
|
||||
while True:
|
||||
anc_temp = set()
|
||||
if i > depth and not depth == 0:
|
||||
break
|
||||
for a in anc_new:
|
||||
p = self.get_person(a, connection=conn)
|
||||
for j in range(3):
|
||||
if p[6+j] > 0:
|
||||
anc_temp.add(p[6+j])
|
||||
anc = anc | anc_new
|
||||
anc_new = anc_temp
|
||||
if len(anc_new) == 0:
|
||||
break
|
||||
i += 1
|
||||
if connection == None:
|
||||
conn.close()
|
||||
return(anc)
|
||||
|
||||
|
||||
|
||||
def add_ancestors(self, mathID, depth=0, connection=None):
|
||||
cur,conn = self.get_cursor(connection)
|
||||
i = 0
|
||||
anc = {mathID}
|
||||
anc_new = {mathID}
|
||||
if not self.exists(mathID):
|
||||
self.add_person(mathID)
|
||||
while True:
|
||||
anc_temp = set()
|
||||
if i > depth and not depth == 0:
|
||||
break
|
||||
for a in anc_new:
|
||||
p = self.get_person(a, connection=connection)
|
||||
for j in range(3):
|
||||
if p[6+j] > 0:
|
||||
anc_temp.add(p[6+j])
|
||||
# print(p[6+j])
|
||||
anc = anc | anc_new
|
||||
anc_new = anc_temp
|
||||
if len(anc_new) == 0:
|
||||
break
|
||||
for a in anc_new:
|
||||
if not self.exists(a):
|
||||
self.add_person(a, connection=connection)
|
||||
i += 1
|
||||
if connection == None:
|
||||
conn.close()
|
||||
|
||||
def add_decendants(self, mathID, depth=0, connection=None):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
def populate_db(self, limit, chunk = 10):
|
||||
conn = sqlite3.connect(self.db_file)
|
||||
@@ -335,14 +489,11 @@ class mathDB:
|
||||
thread = threading.Thread(target = persons[j].get_info)
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
|
||||
for j, thread in enumerate(threads):
|
||||
thread.join()
|
||||
|
||||
for j in range(chunk):
|
||||
print(f"Adding MathID {limit[i*chunk+j]} to database. Entry {i*chunk+j+1} of {l}.", end= '\r')
|
||||
self.insert_person(persons[j], conn)
|
||||
|
||||
threads = list()
|
||||
persons = list()
|
||||
for j in range(r):
|
||||
@@ -351,18 +502,14 @@ class mathDB:
|
||||
thread = threading.Thread(target = persons[j].get_info)
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
|
||||
for j, thread in enumerate(threads):
|
||||
|
||||
thread.join()
|
||||
|
||||
for j in range(r):
|
||||
print(f"Adding MathID {limit[n*chunk+j]} to database. Entry {n*chunk+j+1} of {l}.", end= '\r')
|
||||
self.insert_person(persons[j], conn)
|
||||
# print(f"Downloading entry {i} of {limit}", end= '\r')
|
||||
# page = mathPage(i)
|
||||
# self.insert_person(page)
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
@@ -373,17 +520,55 @@ class mathGenealogy(Graph):
|
||||
def __init__(self, DB="MathGen.db", vertices = None, directed=True):
|
||||
self.db = mathDB(DB)
|
||||
super().__init__(directed=directed)
|
||||
#self.vs["name"] = ""
|
||||
self.vs["name"] = ""
|
||||
|
||||
def add_person(self, vertex):
|
||||
if "name" in vertex.keys():
|
||||
self.add_vertex(vertex["name"])
|
||||
else:
|
||||
print("Mandatory entry 'name' is missing")
|
||||
return()
|
||||
for key in vertex:
|
||||
val = vertex[key]
|
||||
print(f"Dictionary contains {key} with value {val}")
|
||||
def add_person(self, mathID, root=0, line=0, force=False):
|
||||
person = self.db.get_person(mathID)
|
||||
if person[0] == 0:
|
||||
p = mathPage(mathID)
|
||||
p.get_info()
|
||||
entry = p.get_entry()
|
||||
if entry[1] == None:
|
||||
raise ValueError("No such entry in the math genealogy project found.")
|
||||
return
|
||||
else:
|
||||
self.db.insert_person(p)
|
||||
person = self.db.get_person(mathID)
|
||||
if str(mathID) not in self.vs['name']:
|
||||
vID = self.add_vertex(str(mathID)).index
|
||||
self._insert_person(person, vID, root, line)
|
||||
|
||||
elif force:
|
||||
vID = graph.vs.find(name=str(mathID)).index
|
||||
self._insert_person(person, vID, root, line)
|
||||
|
||||
|
||||
def _insert_person(self, person, vID, root, line):
|
||||
students = json.loads(person[9])
|
||||
self.vs[vID]["Name"] = person[1]
|
||||
self.vs[vID]["Title"] = person[2]
|
||||
self.vs[vID]["Year"] = str(person[3])
|
||||
self.vs[vID]["Dissertation"] = person[4]
|
||||
self.vs[vID]["Institution"] = person[5]
|
||||
self.vs[vID]["Students"] = students["MathID"]
|
||||
self.vs[vID]["Advisors"] = person[6:8]
|
||||
self.vs[vID]["Line"] = line
|
||||
self.vs[vID]["roots"] = [root]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# if "name" in vertex.keys():
|
||||
# self.add_vertex(vertex["name"])
|
||||
# else:
|
||||
# print("Mandatory entry 'name' is missing")
|
||||
# return()
|
||||
# for key in vertex:
|
||||
# val = vertex[key]
|
||||
# print(f"Dictionary contains {key} with value {val}")
|
||||
|
||||
|
||||
|
||||
@@ -430,6 +615,17 @@ class mathGenealogy(Graph):
|
||||
|
||||
total_entries = 297377 # number of records as of 2 October 2023
|
||||
|
||||
Greven_ID = 29360 # Andreas Greven
|
||||
Aumann_ID= 36548 # Georg Aumann
|
||||
Anita_ID = 92324 # Anita
|
||||
Fourier_ID = 17981 # Fourier
|
||||
Wolfgang_ID = 150286 # Wolfgang
|
||||
Eichelsbacher_ID = 27275 # Peter
|
||||
Anton_ID = 125956 # Anton
|
||||
Pfaffelhuber_ID = 157881 # Peter Pfaffelhuber
|
||||
Ruess_ID = 75966 # Ruess
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
testDB = mathDB("test.db")
|
||||
test = mathPage(0)
|
||||
|
||||
Reference in New Issue
Block a user