switch camelCase to snake_case

This commit is contained in:
2023-10-06 21:11:04 +02:00
parent 94a3dc862b
commit b2a48447b5

View File

@@ -2,7 +2,7 @@ from lxml import html, etree
import requests import requests
import threading import threading
import time import time
import igraph as ig from igraph import Graph
import sqlite3 import sqlite3
from sqlite3 import Error from sqlite3 import Error
@@ -22,14 +22,14 @@ class mathPage:
self.diss = None self.diss = None
self.advisorID = [None, None, None] self.advisorID = [None, None, None]
def getEntry(self):
def get_entry(self):
return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2])) return((self.id, self.name, self.title, self.inst, self.year, self.diss, self.advisorID[0], self.advisorID[1], self.advisorID[2]))
def getInfo(self): def get_info(self):
temp = requests.get(self.url) temp = requests.get(self.url)
temp.encoding = "utf-8" temp.encoding = "utf-8"
self.page = temp.text self.page = temp.text
@@ -41,7 +41,7 @@ class mathPage:
self.advisorID = [None, None, None] self.advisorID = [None, None, None]
else: else:
self.tree = html.fromstring(self.page) self.tree = html.fromstring(self.page)
self.parsePage() self.parse_page()
@@ -53,7 +53,7 @@ class mathPage:
def parsePage(self): def parse_page(self):
name1 = self.tree.xpath('//*[@id="paddingWrapper"]/h2/text()') name1 = self.tree.xpath('//*[@id="paddingWrapper"]/h2/text()')
self.name = str(name1[0]).replace(" "," ").strip() self.name = str(name1[0]).replace(" "," ").strip()
@@ -142,13 +142,14 @@ class mathPage:
class mathDB: class mathDB:
def __init__(self, db_file): def __init__(self, db_file):
self.db_file = db_file self.db_file = db_file
self.initDB() self.init_db()
def createConnection(self): def create_connection(self):
""" create a database connection to a SQLite database """ """ create a database connection to a SQLite database """
conn = None conn = None
try: try:
@@ -162,7 +163,7 @@ class mathDB:
def createTable(self, conn, create_table_sql): def create_table(self, conn, create_table_sql):
""" create a table from the create_table_sql statement """ create a table from the create_table_sql statement
:param conn: Connection object :param conn: Connection object
:param create_table_sql: a CREATE TABLE statement :param create_table_sql: a CREATE TABLE statement
@@ -176,8 +177,8 @@ class mathDB:
def initDB(self): def init_db(self):
conn = self.createConnection() conn = self.create_connection()
sql_create_main_table = """CREATE TABLE IF NOT EXISTS mathematicians ( sql_create_main_table = """CREATE TABLE IF NOT EXISTS mathematicians (
id integer PRIMARY KEY, id integer PRIMARY KEY,
name text NOT NULL, name text NOT NULL,
@@ -190,22 +191,22 @@ class mathDB:
third_advisor integer third_advisor integer
);""" );"""
if conn is not None: if conn is not None:
self.createTable(conn, sql_create_main_table) self.create_table(conn, sql_create_main_table)
conn.close() conn.close()
else: else:
print("Cannot create database connection") print("Cannot create database connection")
def insertPerson(self, mathPage, connection=None): def insert_person(self, mathPage, connection=None):
if connection == None: if connection == None:
conn = self.createConnection() conn = self.create_connection()
else: else:
conn = connection conn = connection
cur = conn.cursor() cur = conn.cursor()
row = self.getPerson(mathPage.id, conn) row = self.get_person(mathPage.id, conn)
if len(row) == 0: if len(row) == 0:
cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?)", mathPage.getEntry()) cur.execute("INSERT INTO mathematicians VALUES (?,?,?,?,?,?,?,?,?)", mathPage.get_entry())
else: else:
cur.execute(""" cur.execute("""
UPDATE mathematicians UPDATE mathematicians
@@ -225,7 +226,7 @@ class mathDB:
def findMissing(self, limit): def find_missing(self, limit):
""" """
Find missing entries in database. Find missing entries in database.
""" """
@@ -267,14 +268,14 @@ class mathDB:
def fetchMissing(self, limit): def fetch_missing(self, limit):
missing = self.findMissing(limit) missing = self.find_missing(limit)
if len(missing) > 0: if len(missing) > 0:
self.populateDB(missing) self.populate_db(missing)
def checkMissingData(self, id): def check_missing_data(self, id):
conn = sqlite3.connect(self.db_file) conn = sqlite3.connect(self.db_file)
cur = conn.cursor() cur = conn.cursor()
cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (id,)) cur.execute("SELECT EXISTS(SELECT * FROM mathematicians WHERE id = ?)", (id,))
@@ -291,12 +292,12 @@ class mathDB:
def getPerson(self, id, connection=None): def get_person(self, id, connection=None):
if connection == None: if connection == None:
conn = self.createConnection() conn = self.create_connection()
else: else:
conn = connection conn = connection
conn = self.createConnection() conn = self.create_connection()
cur = conn.cursor() cur = conn.cursor()
res = cur.execute(f"SELECT * FROM mathematicians WHERE id = {id}") res = cur.execute(f"SELECT * FROM mathematicians WHERE id = {id}")
res = cur.fetchall() res = cur.fetchall()
@@ -315,7 +316,7 @@ class mathDB:
# return(ret) # return(ret)
def populateDB(self, limit, chunk = 10): def populate_db(self, limit, chunk = 10):
conn = sqlite3.connect(self.db_file) conn = sqlite3.connect(self.db_file)
if type(limit) == int: if type(limit) == int:
limit = range(1, limit+1) limit = range(1, limit+1)
@@ -331,7 +332,7 @@ class mathDB:
for j in range(chunk): for j in range(chunk):
persons.append(mathPage(limit[i*chunk+j])) persons.append(mathPage(limit[i*chunk+j]))
# print(f"Creating MathID {limit[i*chunk+j]}") # print(f"Creating MathID {limit[i*chunk+j]}")
thread = threading.Thread(target = persons[j].getInfo) thread = threading.Thread(target = persons[j].get_info)
threads.append(thread) threads.append(thread)
thread.start() thread.start()
@@ -340,34 +341,80 @@ class mathDB:
for j in range(chunk): for j in range(chunk):
print(f"Adding MathID {limit[i*chunk+j]} to database. Entry {i*chunk+j+1} of {l}.", end= '\r') print(f"Adding MathID {limit[i*chunk+j]} to database. Entry {i*chunk+j+1} of {l}.", end= '\r')
self.insertPerson(persons[j], conn) self.insert_person(persons[j], conn)
threads = list() threads = list()
persons = list() persons = list()
for j in range(r): for j in range(r):
persons.append(mathPage(limit[n*chunk+j])) persons.append(mathPage(limit[n*chunk+j]))
# print(f"Creating MathID {limit[n*chunk+j]}") # print(f"Creating MathID {limit[n*chunk+j]}")
thread = threading.Thread(target = persons[j].getInfo) thread = threading.Thread(target = persons[j].get_info)
threads.append(thread) threads.append(thread)
thread.start() thread.start()
for j, thread in enumerate(threads): for j, thread in enumerate(threads):
thread.join() thread.join()
for j in range(r): for j in range(r):
print(f"Adding MathID {limit[n*chunk+j]} to database. Entry {n*chunk+j+1} of {l}.", end= '\r') print(f"Adding MathID {limit[n*chunk+j]} to database. Entry {n*chunk+j+1} of {l}.", end= '\r')
self.insertPerson(persons[j], conn) self.insert_person(persons[j], conn)
# print(f"Downloading entry {i} of {limit}", end= '\r') # print(f"Downloading entry {i} of {limit}", end= '\r')
# page = mathPage(i) # page = mathPage(i)
# self.insertPerson(page) # self.insert_person(page)
conn.close() conn.close()
class mathGenealogy: class mathGenealogy(Graph):
pass
def __init__(self, DB="MathGen.db", vertices = None, directed=True):
super().__init__(directed=directed)
self.vs["name"] = ""
def add_vertex
def _wrap_string(self, string, wrap):
out='<i>'
while (len(string)>wrap) :
helpString=string[:(wrap+1)]
i=helpString.rfind(' ')
out = out + helpString[:i] + '</i><br/><i>'
string=string[(i+1):]
out = out + string + '</i>'
return(out)
def _wrap_institute(self, string):
out=''
while (string.find(' and ')>=0) :
i=string.find('and')
out = out + string[:(i-1)] + '<br/>and '
string=string[(i+4):]
out = out + string
return(out)
def _make_nice_label(self, vID):
label = "<<b><font point-size='18'>" + self.vs[vID]["Name"] + "</font></b><br/>"
diss = self.vs[vID]["Dissertation"]
if diss and not diss.isspace():
line2 = self._wrap_string(diss, 60) + "<br/>"
line2 = line2.replace("&", "&amp;")
label = label + line2
inst = self.vs[vID]["Institution"]
if inst and not inst.isspace():
inst = self._wrap_institute(inst)
else:
inst = 'Unknown'
year = self.vs[vID]["Year"]
if not year or year.isspace():
year = '?'
label= label + inst + ", <b>" + year + "</b>>"
return(label)