mirror of
https://codeberg.org/JasterV/sarscov-hierarchy.git
synced 2026-04-26 18:10:08 +00:00
id to country
This commit is contained in:
parent
39512a31fb
commit
4dba461239
6 changed files with 93 additions and 21 deletions
67
src/python/hierarchy
Normal file
67
src/python/hierarchy
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
graph "Hierarchy Sars-Cov-2" {
|
||||
node [shape=plaintext]
|
||||
"Japan,Hong Kong" -- Japan
|
||||
"Japan,Hong Kong" -- "Hong Kong"
|
||||
"Japan,Hong Kong,Brazil" -- "Japan,Hong Kong"
|
||||
"Japan,Hong Kong,Brazil" -- Brazil
|
||||
"Japan,Hong Kong,Brazil,South Korea" -- "Japan,Hong Kong,Brazil"
|
||||
"Japan,Hong Kong,Brazil,South Korea" -- "South Korea"
|
||||
"Japan,Hong Kong,Brazil,South Korea,Pakistan" -- "Japan,Hong Kong,Brazil,South Korea"
|
||||
"Japan,Hong Kong,Brazil,South Korea,Pakistan" -- Pakistan
|
||||
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam" -- "Japan,Hong Kong,Brazil,South Korea,Pakistan"
|
||||
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam" -- "Viet Nam"
|
||||
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden" -- "Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam"
|
||||
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden" -- Sweden
|
||||
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan" -- "Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden"
|
||||
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan" -- Taiwan
|
||||
"South Africa,Greece" -- "South Africa"
|
||||
"South Africa,Greece" -- Greece
|
||||
"Malaysia,Thailand" -- Malaysia
|
||||
"Malaysia,Thailand" -- Thailand
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan" -- "South Africa,Greece"
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan" -- "Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan"
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan"
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland" -- Finland
|
||||
"Spain,Colombia" -- Spain
|
||||
"Spain,Colombia" -- Colombia
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland"
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France" -- France
|
||||
"India,Nepal" -- India
|
||||
"India,Nepal" -- Nepal
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France"
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey" -- Turkey
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey"
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel" -- Israel
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel"
|
||||
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown" -- Unknown
|
||||
"India,Nepal,Peru" -- "India,Nepal"
|
||||
"India,Nepal,Peru" -- Peru
|
||||
"India,Nepal,Peru,China" -- "India,Nepal,Peru"
|
||||
"India,Nepal,Peru,China" -- China
|
||||
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown" -- "India,Nepal,Peru,China"
|
||||
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown"
|
||||
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA" -- "India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown"
|
||||
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA" -- USA
|
||||
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia" -- "India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA"
|
||||
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia" -- "Spain,Colombia"
|
||||
"Nigeria,Australia" -- Nigeria
|
||||
"Nigeria,Australia" -- Australia
|
||||
"Malaysia,Thailand,Italy" -- "Malaysia,Thailand"
|
||||
"Malaysia,Thailand,Italy" -- Italy
|
||||
"Malaysia,Thailand,Italy,Iran" -- "Malaysia,Thailand,Italy"
|
||||
"Malaysia,Thailand,Italy,Iran" -- Iran
|
||||
"Malaysia,Thailand,Italy,Iran,Philippines" -- "Malaysia,Thailand,Italy,Iran"
|
||||
"Malaysia,Thailand,Italy,Iran,Philippines" -- Philippines
|
||||
"Nigeria,Australia,Tunisia" -- "Nigeria,Australia"
|
||||
"Nigeria,Australia,Tunisia" -- Tunisia
|
||||
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines" -- "Nigeria,Australia,Tunisia"
|
||||
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines" -- "Malaysia,Thailand,Italy,Iran,Philippines"
|
||||
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq" -- "Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines"
|
||||
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq" -- Iraq
|
||||
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia,Serbia" -- "India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia"
|
||||
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia,Serbia" -- Serbia
|
||||
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq,Belgium" -- "Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq"
|
||||
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq,Belgium" -- Belgium
|
||||
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq,Belgium,India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia,Serbia" -- "Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq,Belgium"
|
||||
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq,Belgium,India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia,Serbia" -- "India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia,Serbia"
|
||||
}
|
||||
BIN
src/python/hierarchy.png
Normal file
BIN
src/python/hierarchy.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 292 KiB |
|
|
@ -18,11 +18,12 @@ def main():
|
|||
csv_table = CsvTable(csv_path).group_countries_by_median_length()
|
||||
ids = csv_table.values('Accession')
|
||||
fasta_map = FastaMap(fasta_path).filter(lambda item: item[0] in ids)
|
||||
csv_table = csv_table.dict_by_id()
|
||||
print("Files processing finished!")
|
||||
|
||||
labels = csv_table.dict_of('Accession', 'Geo_Location')
|
||||
|
||||
print("\nBuilding hierarchy...")
|
||||
fasta_map.build_hierarchy(csv_table)
|
||||
fasta_map.build_hierarchy(labels)
|
||||
print("Done!")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,12 @@ class CsvTable:
|
|||
"""
|
||||
return [row[column] for row in self]
|
||||
|
||||
def dict_of(self, key, value):
|
||||
try:
|
||||
return dict(zip(self.values(key), self.values(value)))
|
||||
except:
|
||||
print("KeyError")
|
||||
|
||||
def group_countries_by_median_length(self):
|
||||
"""
|
||||
Filters the csv by country for average length's
|
||||
|
|
@ -72,9 +78,6 @@ class CsvTable:
|
|||
else "Unknown"
|
||||
return row
|
||||
|
||||
def dict_by_id(self):
|
||||
return {elem["Accession"]: elem for elem in self._table}
|
||||
|
||||
@staticmethod
|
||||
def __read(file_path: str) -> List[Dict]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -47,16 +47,14 @@ class FastaMap:
|
|||
"""
|
||||
return FastaMap(filter(function, self))
|
||||
|
||||
def build_hierarchy(self, by_id=None) -> None:
|
||||
def build_hierarchy(self, labels):
|
||||
"""
|
||||
The function that is in charge of the comparison and the hierarchy of the samples
|
||||
:return None:
|
||||
"""
|
||||
if by_id is None:
|
||||
by_id = {}
|
||||
comparisons = self._compare_all_samples(by_id)
|
||||
comparisons = self._compare_all_samples()
|
||||
table = self._to_dict(comparisons)
|
||||
tree = HierarchyTree()
|
||||
tree = HierarchyTree(labels)
|
||||
|
||||
while len(table) > 1:
|
||||
closest_pair = self.__find_closest_pair(table)
|
||||
|
|
@ -80,7 +78,7 @@ class FastaMap:
|
|||
data[rna_id] = rna if len(rna) < 1000 else rna[:1000]
|
||||
return data
|
||||
|
||||
def _compare_all_samples(self, csv_table):
|
||||
def _compare_all_samples(self):
|
||||
# Calculate the number of threads that can be
|
||||
# used in order to speed up the comparisons
|
||||
max_length = max(map(len, self.__data.values()))
|
||||
|
|
@ -94,9 +92,6 @@ class FastaMap:
|
|||
for i in range(len(ids) - 1)
|
||||
for j in range(i + 1, len(ids))]
|
||||
comparisons = sq.par_compare(to_compare, self.__data, str(threads))
|
||||
if csv_table:
|
||||
comparisons = [(csv_table[id1]["Geo_Location"], csv_table[id2]["Geo_Location"], result)
|
||||
for id1, id2, result in comparisons]
|
||||
print(
|
||||
f"Comparisons performed in {time.time() - start_time:.3f} seconds!")
|
||||
return comparisons
|
||||
|
|
|
|||
|
|
@ -2,15 +2,21 @@ from graphviz import Graph
|
|||
|
||||
|
||||
class HierarchyTree:
|
||||
def __init__(self):
|
||||
self.__dot = Graph("Hierarchy Sars-Cov-2", format='svg')
|
||||
def __init__(self, labels):
|
||||
self.__dot = Graph("Hierarchy Sars-Cov-2", format='png',
|
||||
node_attr={'shape': 'plaintext'})
|
||||
self.__labels = labels
|
||||
|
||||
def add_relation(self, pair):
|
||||
node1, node2 = tuple(map(lambda x: str(x).translate(
|
||||
str.maketrans({'(': '', ')': '', "'": ''})), pair))
|
||||
new_node = f"{node1}, {node2}"
|
||||
self.__dot.edge(node1, new_node)
|
||||
self.__dot.edge(node2, new_node)
|
||||
node1, node2 = tuple(map(self.__transform, pair))
|
||||
new_node = f"{node1},{node2}"
|
||||
self.__dot.edge(new_node, node1)
|
||||
self.__dot.edge(new_node, node2)
|
||||
|
||||
def show(self):
|
||||
self.__dot.render("hierarchy")
|
||||
|
||||
def __transform(self, value):
|
||||
value = str(value).translate(
|
||||
str.maketrans({'(': '', ')': '', "'": ''}))
|
||||
return ','.join(map(lambda x: self.__labels[x.strip()], value.split(',')))
|
||||
|
|
|
|||
Loading…
Reference in a new issue