id to country

This commit is contained in:
JasterV 2020-05-21 23:08:02 +02:00
commit 4dba461239
6 changed files with 93 additions and 21 deletions

67
src/python/hierarchy Normal file
View file

@ -0,0 +1,67 @@
graph "Hierarchy Sars-Cov-2" {
node [shape=plaintext]
"Japan,Hong Kong" -- Japan
"Japan,Hong Kong" -- "Hong Kong"
"Japan,Hong Kong,Brazil" -- "Japan,Hong Kong"
"Japan,Hong Kong,Brazil" -- Brazil
"Japan,Hong Kong,Brazil,South Korea" -- "Japan,Hong Kong,Brazil"
"Japan,Hong Kong,Brazil,South Korea" -- "South Korea"
"Japan,Hong Kong,Brazil,South Korea,Pakistan" -- "Japan,Hong Kong,Brazil,South Korea"
"Japan,Hong Kong,Brazil,South Korea,Pakistan" -- Pakistan
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam" -- "Japan,Hong Kong,Brazil,South Korea,Pakistan"
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam" -- "Viet Nam"
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden" -- "Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam"
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden" -- Sweden
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan" -- "Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden"
"Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan" -- Taiwan
"South Africa,Greece" -- "South Africa"
"South Africa,Greece" -- Greece
"Malaysia,Thailand" -- Malaysia
"Malaysia,Thailand" -- Thailand
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan" -- "South Africa,Greece"
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan" -- "Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan"
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan"
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland" -- Finland
"Spain,Colombia" -- Spain
"Spain,Colombia" -- Colombia
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland"
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France" -- France
"India,Nepal" -- India
"India,Nepal" -- Nepal
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France"
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey" -- Turkey
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey"
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel" -- Israel
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel"
"South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown" -- Unknown
"India,Nepal,Peru" -- "India,Nepal"
"India,Nepal,Peru" -- Peru
"India,Nepal,Peru,China" -- "India,Nepal,Peru"
"India,Nepal,Peru,China" -- China
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown" -- "India,Nepal,Peru,China"
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown" -- "South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown"
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA" -- "India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown"
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA" -- USA
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia" -- "India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA"
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia" -- "Spain,Colombia"
"Nigeria,Australia" -- Nigeria
"Nigeria,Australia" -- Australia
"Malaysia,Thailand,Italy" -- "Malaysia,Thailand"
"Malaysia,Thailand,Italy" -- Italy
"Malaysia,Thailand,Italy,Iran" -- "Malaysia,Thailand,Italy"
"Malaysia,Thailand,Italy,Iran" -- Iran
"Malaysia,Thailand,Italy,Iran,Philippines" -- "Malaysia,Thailand,Italy,Iran"
"Malaysia,Thailand,Italy,Iran,Philippines" -- Philippines
"Nigeria,Australia,Tunisia" -- "Nigeria,Australia"
"Nigeria,Australia,Tunisia" -- Tunisia
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines" -- "Nigeria,Australia,Tunisia"
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines" -- "Malaysia,Thailand,Italy,Iran,Philippines"
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq" -- "Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines"
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq" -- Iraq
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia,Serbia" -- "India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia"
"India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia,Serbia" -- Serbia
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq,Belgium" -- "Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq"
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq,Belgium" -- Belgium
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq,Belgium,India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia,Serbia" -- "Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq,Belgium"
"Nigeria,Australia,Tunisia,Malaysia,Thailand,Italy,Iran,Philippines,Iraq,Belgium,India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia,Serbia" -- "India,Nepal,Peru,China,South Africa,Greece,Japan,Hong Kong,Brazil,South Korea,Pakistan,Viet Nam,Sweden,Taiwan,Finland,France,Turkey,Israel,Unknown,USA,Spain,Colombia,Serbia"
}

BIN
src/python/hierarchy.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 292 KiB

View file

@ -18,11 +18,12 @@ def main():
csv_table = CsvTable(csv_path).group_countries_by_median_length()
ids = csv_table.values('Accession')
fasta_map = FastaMap(fasta_path).filter(lambda item: item[0] in ids)
csv_table = csv_table.dict_by_id()
print("Files processing finished!")
labels = csv_table.dict_of('Accession', 'Geo_Location')
print("\nBuilding hierarchy...")
fasta_map.build_hierarchy(csv_table)
fasta_map.build_hierarchy(labels)
print("Done!")

View file

@ -49,6 +49,12 @@ class CsvTable:
"""
return [row[column] for row in self]
def dict_of(self, key, value):
try:
return dict(zip(self.values(key), self.values(value)))
except:
print("KeyError")
def group_countries_by_median_length(self):
"""
Filters the csv by country for average length's
@ -72,9 +78,6 @@ class CsvTable:
else "Unknown"
return row
def dict_by_id(self):
return {elem["Accession"]: elem for elem in self._table}
@staticmethod
def __read(file_path: str) -> List[Dict]:
"""

View file

@ -47,16 +47,14 @@ class FastaMap:
"""
return FastaMap(filter(function, self))
def build_hierarchy(self, by_id=None) -> None:
def build_hierarchy(self, labels):
"""
The function that is in charge of the comparison and the hierarchy of the samples
:return None:
"""
if by_id is None:
by_id = {}
comparisons = self._compare_all_samples(by_id)
comparisons = self._compare_all_samples()
table = self._to_dict(comparisons)
tree = HierarchyTree()
tree = HierarchyTree(labels)
while len(table) > 1:
closest_pair = self.__find_closest_pair(table)
@ -80,7 +78,7 @@ class FastaMap:
data[rna_id] = rna if len(rna) < 1000 else rna[:1000]
return data
def _compare_all_samples(self, csv_table):
def _compare_all_samples(self):
# Calculate the number of threads that can be
# used in order to speed up the comparisons
max_length = max(map(len, self.__data.values()))
@ -94,9 +92,6 @@ class FastaMap:
for i in range(len(ids) - 1)
for j in range(i + 1, len(ids))]
comparisons = sq.par_compare(to_compare, self.__data, str(threads))
if csv_table:
comparisons = [(csv_table[id1]["Geo_Location"], csv_table[id2]["Geo_Location"], result)
for id1, id2, result in comparisons]
print(
f"Comparisons performed in {time.time() - start_time:.3f} seconds!")
return comparisons

View file

@ -2,15 +2,21 @@ from graphviz import Graph
class HierarchyTree:
def __init__(self):
self.__dot = Graph("Hierarchy Sars-Cov-2", format='svg')
def __init__(self, labels):
self.__dot = Graph("Hierarchy Sars-Cov-2", format='png',
node_attr={'shape': 'plaintext'})
self.__labels = labels
def add_relation(self, pair):
node1, node2 = tuple(map(lambda x: str(x).translate(
str.maketrans({'(': '', ')': '', "'": ''})), pair))
new_node = f"{node1}, {node2}"
self.__dot.edge(node1, new_node)
self.__dot.edge(node2, new_node)
node1, node2 = tuple(map(self.__transform, pair))
new_node = f"{node1},{node2}"
self.__dot.edge(new_node, node1)
self.__dot.edge(new_node, node2)
def show(self):
self.__dot.render("hierarchy")
def __transform(self, value):
value = str(value).translate(
str.maketrans({'(': '', ')': '', "'": ''}))
return ','.join(map(lambda x: self.__labels[x.strip()], value.split(',')))