Skip to content

Commit abadcf1

Browse files
author
Sanjay Gopala Krishna
committed
added
1 parent 8eb6611 commit abadcf1

File tree

4 files changed

+740
-43
lines changed

4 files changed

+740
-43
lines changed

.DS_Store

0 Bytes
Binary file not shown.

build/lib/cli/cluster_search.py

Lines changed: 245 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,14 +1230,7 @@ def make_datatable(self, correlation_dict):
12301230
lambda x: x.split('/')[-1].replace('.txt', ''))
12311231
df['Correlation'] = df['Correlation'].apply(lambda x: round(x, 2))
12321232
#Add KLD from kld_clust_group_kld
1233-
self.console.log(f"Adding KLD to the dataframe")
1234-
self.console.log(f"self.kld_df: {self.kld_df}")
1235-
# self.console.log(f"self.kld_df.columns: {self.kld_df.columns}")
12361233
if self.kld_df is not None:
1237-
# kld_df = pd.DataFrame(self.kld_df)
1238-
# kld_df['Cluster'] = kld_df['Cluster'].apply(
1239-
# lambda x: x.split('/')[-1].split('.')[1])
1240-
# kld_df['KLD'] = kld_df['KLD'].apply(lambda x: round(x, 2))
12411234
try:
12421235
df['KLD'] = df['Cluster'].apply(
12431236
lambda x: self.kld_df.loc[
@@ -1259,8 +1252,12 @@ def make_datatable(self, correlation_dict):
12591252
df = df.reset_index(drop=True)
12601253
df = df[['Cluster', 'HLA', 'Correlation', 'KLD']]
12611254

1262-
1263-
1255+
# self.console.log(
1256+
# f"Correlation data:\n{df}", style="bold green")
1257+
fig, ax = plot_kld_pcc_distance(self.kld_df,df)
1258+
fig.savefig(os.path.join(self._outfolder, 'corr-data', 'kld_pcc_distance.png'))
1259+
self.console.log(
1260+
f"Saved KLD and PCC distance plot at {os.path.join(self._outfolder, 'corr-data', 'kld_pcc_distance.png')}", style="bold green")
12641261
return df
12651262

12661263
def process_correlation_data(self, df=None):
@@ -1434,7 +1431,7 @@ def make_datatable_html(self, correlation_dict, df=None):
14341431
<td>{df['Cluster'][i]}</td>
14351432
<td>{df['HLA'][i]}</td>
14361433
<td>{df['Correlation'][i]}</td>
1437-
<td>{df['KLD'][i]}</td>
1434+
<td>{round(df['KLD'][i], 4) if df['KLD'][i] != 'NA' else 'NA'}</td>
14381435
</tr>
14391436
"""
14401437
if df['Correlation'][i] < 0.5:
@@ -1444,7 +1441,7 @@ def make_datatable_html(self, correlation_dict, df=None):
14441441
<td>{df['Cluster'][i]}</td>
14451442
<td>{df['HLA'][i]}</td>
14461443
<td>{df['Correlation'][i]}</td>
1447-
<td>{df['KLD'][i]}</td>
1444+
<td>{round(df['KLD'][i], 4) if df['KLD'][i] != 'NA' else 'NA'}</td>
14481445
</tr>
14491446
"""
14501447

@@ -1461,6 +1458,216 @@ def make_datatable_html(self, correlation_dict, df=None):
14611458

14621459
# return df.to_html(classes='table table-striped', index=False, table_id='correlation_table')
14631460

1461+
#!!! Best HLA View
1462+
1463+
def generate_best_matches_tab(self, highest_corr_per_row, threshold=0.7):
1464+
"""
1465+
Extracts the best HLA-A, HLA-B, and HLA-C matches from highest_corr_per_row and generates the HTML tab view.
1466+
1467+
Args:
1468+
highest_corr_per_row: Dictionary mapping cluster ID to tuple of (hla_id, correlation)
1469+
threshold: Minimum correlation value to consider (default: 0.7)
1470+
1471+
Returns:
1472+
HTML string for the Best Matched Results tab content
1473+
"""
1474+
# Initialize dictionaries to store best matches for each HLA type
1475+
best_hla_matches = {
1476+
'A': {'best': None, 'second': None},
1477+
'B': {'best': None, 'second': None},
1478+
'C': {'best': None, 'second': None}
1479+
}
1480+
1481+
# Parse highest_corr_per_row to extract HLA type information
1482+
for cluster_id, (hla_id, correlation) in highest_corr_per_row.items():
1483+
# Extract HLA type (A, B, or C) from the HLA ID
1484+
# Check different formats: HLA_A0101, HLA-A0101, or A0101
1485+
hla_name = hla_id
1486+
if isinstance(hla_id, str):
1487+
if 'HLA_' in hla_id:
1488+
hla_name = hla_id.split('HLA_')[-1]
1489+
elif 'HLA-' in hla_id:
1490+
hla_name = hla_id.split('HLA-')[-1]
1491+
1492+
# Extract the HLA type letter (A, B, C)
1493+
hla_type = None
1494+
if hla_name.startswith('A'):
1495+
hla_type = 'A'
1496+
elif hla_name.startswith('B'):
1497+
hla_type = 'B'
1498+
elif hla_name.startswith('C'):
1499+
hla_type = 'C'
1500+
1501+
# Skip if correlation is below threshold or HLA type not recognized
1502+
if correlation < threshold or hla_type is None:
1503+
continue
1504+
1505+
# Format HLA name for display
1506+
formatted_hla = f"HLA-{hla_name}"
1507+
1508+
# Check if this is better than current best match
1509+
if best_hla_matches[hla_type]['best'] is None or correlation > best_hla_matches[hla_type]['best'][1]:
1510+
# Move current best to second best
1511+
best_hla_matches[hla_type]['second'] = best_hla_matches[hla_type]['best']
1512+
# Set new best
1513+
best_hla_matches[hla_type]['best'] = (formatted_hla, correlation, cluster_id)
1514+
# Check if this is better than current second best
1515+
elif best_hla_matches[hla_type]['second'] is None or correlation > best_hla_matches[hla_type]['second'][1]:
1516+
best_hla_matches[hla_type]['second'] = (formatted_hla, correlation, cluster_id)
1517+
1518+
# Generate HTML for tab content
1519+
html = '<div class="tab-pane fade show active" id="beastresults" role="tabpanel" aria-labelledby="beastresults-tab">'
1520+
1521+
# Function to lookup KLD value (to be implemented based on your data structure)
1522+
def get_kld_value(cluster_id, hla_id):
1523+
# Replace with actual lookup in your KLD dataframe or data structure
1524+
# For now returning a placeholder value
1525+
return 1.5
1526+
1527+
# Function to get image paths (to be implemented based on your data structure)
1528+
def get_image_paths(cluster_id, hla_id):
1529+
# Replace with actual path construction logic
1530+
gibbs_img = f"cluster-img/gibbs_logos_{str(cluster_id).replace(".mat","")}-001.png"
1531+
ref_img = f"allotypes-img/{str(hla_id).replace(".txt","").replace("-","_")}.png"
1532+
return gibbs_img, ref_img
1533+
1534+
# Generate row for each HLA type
1535+
for hla_type, color in [('A', 'primary'), ('B', 'success'), ('C', 'info')]:
1536+
if best_hla_matches[hla_type]['best'] is None:
1537+
continue # Skip if no matches found for this HLA type
1538+
1539+
html += f'<!-- HLA-{hla_type} Row -->\n<div class="row">'
1540+
1541+
# First column (best match)
1542+
if best_hla_matches[hla_type]['best']:
1543+
hla_id, correlation, cluster_id = best_hla_matches[hla_type]['best']
1544+
kld = get_kld_value(cluster_id, hla_id)
1545+
gibbs_img, ref_img = get_image_paths(cluster_id, hla_id)
1546+
1547+
html += f'''
1548+
<div class="col-md-6">
1549+
<div class="card mt-4 shadow-sm">
1550+
<div class="card-header bg-{color} text-white">
1551+
<h5 class="card-title mb-0 text-center">{hla_id} (PCC: {correlation:.2f}, KLD: {kld:.2f})</h5>
1552+
</div>
1553+
<div class="card-body">
1554+
<div class="row mb-3">
1555+
<h5 class="mb-2">Gibbs Cluster</h5>
1556+
<div class="text-center">
1557+
<img src="{gibbs_img}" class="img-fluid border" alt="Gibbs Cluster" style="max-height: 300px;">
1558+
</div>
1559+
</div>
1560+
<div class="row">
1561+
<h5 class="mb-2">Reference HLA</h5>
1562+
<div class="text-center">
1563+
<img src="{ref_img}" class="img-fluid border" alt="Reference HLA" style="max-height: 300px;">
1564+
</div>
1565+
</div>
1566+
</div>
1567+
</div>
1568+
</div>
1569+
'''
1570+
1571+
# Second column (second best match)
1572+
if best_hla_matches[hla_type]['second']:
1573+
hla_id, correlation, cluster_id = best_hla_matches[hla_type]['second']
1574+
kld = get_kld_value(cluster_id, hla_id)
1575+
gibbs_img, ref_img = get_image_paths(cluster_id, hla_id)
1576+
1577+
html += f'''
1578+
<div class="col-md-6">
1579+
<div class="card mt-4 shadow-sm">
1580+
<div class="card-header bg-{color} text-white">
1581+
<h5 class="card-title mb-0 text-center">{hla_id} (PCC: {correlation:.2f}, KLD: {kld:.2f})</h5>
1582+
</div>
1583+
<div class="card-body">
1584+
<div class="row mb-3">
1585+
<h5 class="mb-2">Gibbs Cluster</h5>
1586+
<div class="text-center">
1587+
<img src="{gibbs_img}" class="img-fluid border" alt="Gibbs Cluster" style="max-height: 300px;">
1588+
</div>
1589+
</div>
1590+
<div class="row">
1591+
<h5 class="mb-2">Reference HLA</h5>
1592+
<div class="text-center">
1593+
<img src="{ref_img}" class="img-fluid border" alt="Reference HLA" style="max-height: 300px;">
1594+
</div>
1595+
</div>
1596+
</div>
1597+
</div>
1598+
</div>
1599+
'''
1600+
1601+
html += '</div>\n'
1602+
1603+
html += '</div>'
1604+
1605+
return html
1606+
1607+
def create_tab_views(self, highest_corr_per_row, all_results_html):
1608+
"""
1609+
Creates the complete tabbed interface with Best Matched Results and All Results tabs.
1610+
1611+
Args:
1612+
highest_corr_per_row: Dictionary mapping cluster ID to tuple of (hla_id, correlation)
1613+
all_results_html: HTML content for the All Results by Cluster tab
1614+
1615+
Returns:
1616+
String containing the complete HTML for the tabbed interface
1617+
"""
1618+
# Generate the best matches tab content
1619+
best_matches_html = self.generate_best_matches_tab(highest_corr_per_row)
1620+
1621+
# Create the tabs structure
1622+
tabs_html = f'''
1623+
1624+
<!-- Tabs navigation -->
1625+
<ul class="nav nav-tabs" id="compareTabs" role="tablist">
1626+
<li class="nav-item" role="presentation">
1627+
<button class="nav-link active" id="beastresults-tab" data-bs-toggle="tab"
1628+
data-bs-target="#beastresults" type="button" role="tab" aria-controls="combined"
1629+
aria-selected="true">Best Matched Results</button>
1630+
</li>
1631+
<li class="nav-item" role="presentation">
1632+
<button class="nav-link active" id="gibbsresults-tab" data-bs-toggle="tab"
1633+
data-bs-target="#gibbsresults" type="button" role="tab" aria-controls="combined"
1634+
aria-selected="false">Gibbs KLD</button>
1635+
</li>
1636+
<li class="nav-item" role="presentation">
1637+
<button class="nav-link" id="allresults-tab" data-bs-toggle="tab"
1638+
data-bs-target="#allresults" type="button" role="tab" aria-controls="heatmap"
1639+
aria-selected="false">All Results by Cluster</button>
1640+
</li>
1641+
</ul>
1642+
1643+
<div class="tab-content" id="compareTabsContent">
1644+
<!-- Best Matched Results Tab -->
1645+
{best_matches_html}
1646+
1647+
<!-- Gibbs KLD Tab -->
1648+
<div class="tab-pane fade" id="gibbsresults" role="tabpanel" aria-labelledby="gibbsresults-tab">
1649+
<div class="col-md-12">
1650+
<div class="card mt-4 shadow-sm">
1651+
<div class="card-body">
1652+
<div class="row">
1653+
<div class="text-center">
1654+
<img src="corr-data/kld_pcc_distance.png" class="img-fluid border" alt="Gibbs Cluster" style="max-height: 400px;">
1655+
</div>
1656+
</div>
1657+
</div>
1658+
</div>
1659+
</div>
1660+
</div>
1661+
<!-- All Results by Cluster Tab -->
1662+
<div class="tab-pane fade" id="allresults" role="tabpanel" aria-labelledby="allresults-tab">
1663+
{all_results_html}
1664+
</div>
1665+
</div>
1666+
'''
1667+
1668+
return tabs_html
1669+
1670+
14641671
#### NEW !!!!! Carousel Control based clusters #########
14651672

14661673

@@ -1520,14 +1727,20 @@ def render_cluster_carousels(self, highest_corr_per_row, gibbs_out):
15201727
Renders carousels for clusters, grouping them by their group number.
15211728
"""
15221729
cluster_hierarchy = self.create_cluster_hierarchy(highest_corr_per_row, gibbs_out)
1523-
cluster_html = """
1524-
<div class="container py-4">
1525-
<div class="card">
1526-
<div class="card-header bg-secondary text-white">
1527-
<h4 class="card-title mb-0">Gibbs Cluster and Reference Motif Comparisons</h4>
1528-
</div>
1529-
<div class="card-body">
1530-
"""
1730+
# cluster_html = """
1731+
# <div class="container py-4">
1732+
# <div class="card">
1733+
# <div class="card-header bg-secondary text-white">
1734+
# <h4 class="card-title mb-0">Gibbs Cluster and Reference Motif Comparisons</h4>
1735+
# </div>
1736+
# <div class="card-body">
1737+
# """
1738+
1739+
cluster_html = """
1740+
"""
1741+
1742+
1743+
15311744

15321745
for cluster_num in sorted(cluster_hierarchy.keys()):
15331746
if cluster_hierarchy[cluster_num]:
@@ -1672,7 +1885,7 @@ def _create_carousel_for_cluster(self, carousel_id, cluster_num, group_data):
16721885
return carousel_html
16731886

16741887

1675-
def render_clustered_results(self, highest_corr_per_row, gibbs_out):
1888+
def render_clustered_results_nav(self, highest_corr_per_row, gibbs_out):
16761889
"""
16771890
Renders the complete HTML for all clustered results with carousels and necessary JavaScript.
16781891
"""
@@ -1687,7 +1900,15 @@ def render_clustered_results(self, highest_corr_per_row, gibbs_out):
16871900
16881901
16891902
"""
1690-
html_card += self.render_cluster_carousels(highest_corr_per_row, gibbs_out)
1903+
1904+
# cluster_hierarchy = self.create_cluster_hierarchy(highest_corr_per_row, gibbs_out)
1905+
1906+
all_results_html = self.render_cluster_carousels(highest_corr_per_row, gibbs_out)
1907+
1908+
1909+
# Add the carousel HTML to the card
1910+
html_card += self.create_tab_views(
1911+
highest_corr_per_row, all_results_html)
16911912

16921913
html_card += """"
16931914
</div>
@@ -2501,7 +2722,9 @@ def generate_html_layout(self, correlation_dict, db, gibbs_out, immunolyser=Fals
25012722
# print("##"*100)
25022723
# print(highest_corr_per_row)
25032724

2504-
new_html_carousel = self.render_cluster_carousels(
2725+
# new_html_carousel = self.render_cluster_carousels(
2726+
# highest_corr_per_row, gibbs_out)
2727+
new_html_carousel = self.render_clustered_results_nav(
25052728
highest_corr_per_row, gibbs_out)
25062729
body_end_1 += carousel_js
25072730
# print(new_html_carousel)

0 commit comments

Comments
 (0)