Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 51 additions & 6 deletions isiscb/isisdata/isiscbviews/playground_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,36 +17,81 @@ def genealogy(request, tenant_id=None):
return render(request, 'isisdata/genealogy.html', context)

request = json.loads(request.body)
subjects = request['subjects']
selected_subjects = request['subjects']
domino_effect = request['domino']
node_ids = set(subjects.copy())
display_subjects = set(selected_subjects.copy())

# When users select subjects of type concept or geographic term,
# the process for producing the network graph data is different
# than that for people and institutions.

# get selected subject authority objects for concepts and places
concept_or_geographic_subject_authority_ids = Authority.objects.filter(
pk__in=selected_subjects,
type_controlled__in=[Authority.CONCEPT, Authority.GEOGRAPHIC_TERM]
)\
.values_list("id", flat=True)

# remove any concepts/places from the display subjects because we don't want them
# in the graph we want to create genealogies from, we want the people who write about them
display_subjects.difference_update(concept_or_geographic_subject_authority_ids)

node_ids = set(display_subjects.copy())

# get ACRs for citations that have selected concept/place as subject
concept_or_geographic_related_citation_ids = None
if concept_or_geographic_subject_authority_ids:
concept_or_geographic_related_citation_ids = ACRelation.objects.filter(
public=True,
citation__public=True,
type_controlled=ACRelation.SUBJECT,
authority__id__in=concept_or_geographic_subject_authority_ids,
)\
.values_list("citation__id", flat=True).distinct("citation__id")

# get the top authors of citations about that concept/place
concept_or_geographic_related_authors = None
if concept_or_geographic_related_citation_ids:
concept_or_geographic_related_authors = ACRelation.objects.filter(
public=True,
type_controlled=ACRelation.AUTHOR,
citation__id__in=[concept_or_geographic_related_citation_ids]
).values('authority__id')\
.annotate(author=Count('authority__id')).order_by('-author')\
.values_list("authority__id", flat=True)[:299]

# add those authors to the display subjects
if concept_or_geographic_related_authors:
display_subjects.update(concept_or_geographic_related_authors)

# fetch all ACRs of theses related to our display subjects
subject_theses_ids = ACRelation.objects.filter(
public=True,
authority__public=True,
citation__public=True,
authority__id__in=subjects,
authority__id__in=display_subjects,
citation__type_controlled=Citation.THESIS,
type_controlled__in=[ACRelation.SCHOOL, ACRelation.AUTHOR, ACRelation.ADVISOR]
)\
.values_list("citation__id", flat=True).distinct("citation__id")

# get the theses linked to those ACRs
subject_theses = Citation.objects.filter(id__in=[subject_theses_ids])

nodes = []
links = []

if subject_theses:
for thesis in subject_theses:
extrapolate_thesis(thesis, node_ids, links, domino_effect, subjects)
extrapolate_thesis(thesis, node_ids, links, domino_effect, display_subjects)

node_associations_min = 0
node_associations_max = 0

if node_ids:
node_authorities = Authority.objects.filter(pk__in=list(node_ids))
for authority in node_authorities:
node, node_association_count = generate_genealogy_node(authority, subjects)
node, node_association_count = generate_genealogy_node(authority, display_subjects)
node_associations_min = node_association_count if node_association_count < node_associations_min else node_associations_min
node_associations_max = node_association_count if node_association_count > node_associations_max else node_associations_max
nodes.append(node)
Expand All @@ -59,7 +104,7 @@ def genealogy(request, tenant_id=None):
context = {
'nodes': json.dumps(nodes),
'links': json.dumps(links),
'subjects': subjects,
'subjects': list(display_subjects),
'node_associations_range': node_associations_range,
}

Expand Down
110 changes: 76 additions & 34 deletions isiscb/isisdata/playground.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,23 +56,23 @@ def generate_theses_by_school_context(top, chart_type, select_schools):

def get_data_for_heatgrid(authority_ids, years, acrs):
"""
this function fetches and formats data in order to generate the heatgrid visualization.
this function takes a queryset of ACRs of theses
and converts them into a list of lists of the following form
(as desired by out-of-the-box D3 heatgrid graph (https://observablehq.com/@d3/the-impact-of-vaccines)):
This function fetches and formats data in order to generate the heatgrid visualization.
This function takes a queryset of ACRs of theses
and converts them into a list of lists of the following form
(as desired by out-of-the-box D3 heatgrid graph (https://observablehq.com/@d3/the-impact-of-vaccines)):

[
[<thesis-count-for-school1-year1>, <thesis-count-for-school1-year-2>, etc.],
[<thesis-count-for-school2-year1>, <thesis-count-for-school2-year-2>, etc.],
]
[
[<thesis-count-for-school1-year1>, <thesis-count-for-school1-year-2>, etc.],
[<thesis-count-for-school2-year1>, <thesis-count-for-school2-year-2>, etc.],
] ...

py:function:: get_data_for_heatgrid(authority_ids, years, acrs)

:param list authority_ids: a list of school ids
:param list years: range of years which will serve as the domain of the graph
:param queryset acrs: a queryset containing the ACRelations of each thesis for each of the schools
:return: formatted data necessary for populating D3.js heatgrid graphs
:rtype: list of lists
:return: formatted data necessary for populating D3.js heatgrid graphs as seen above
:rtype: list
"""
citations_count_per_year = []
years_counts_template = [0] * len(years)
Expand All @@ -90,24 +90,25 @@ def get_data_for_heatgrid(authority_ids, years, acrs):

def get_data_for_stacked_area(acrs, years, schools):
"""
this function fetches and formats data in order to generate the area visualizations (stacked, normalized, streamgraph).
this function takes a queryset of ACRs of theses
and converts them into a list of objects of the following form
(as desired by out-of-the-box D3 stacked area graphs):

{
"date": <date YYYY-MM-DD>,
"school": <str name-of-school>,
"theses": <int number-of-theses>
}
This function fetches and formats data in order to generate the area
visualizations (stacked, normalized, streamgraph).
This function takes a queryset of ACRs of theses
and converts them into a list of objects of the following form
(as desired by out-of-the-box D3 stacked area graphs):

{
"date": <date YYYY-MM-DD>,
"school": <str name-of-school>,
"theses": <int number-of-theses>
}

py:function:: get_data_for_stacked_area(acrs, years, schools)

:param date date: a year
:param str school: name of school that hosts theses
:param int theses: the number of theses produced at each school in each year
:return: formatted data necessary for populating D3.js area-type graphs
:rtype: list of dicts
:param queryset acrs: a queryset of thesis ACRs
:param list years: a list representing the date range for the data
:param list schools: a list of school names
:return: list of dicts of formatted data necessary for populating D3.js area-type graphs
:rtype: list
"""

schools_years = { school : years.copy() for school in schools }
Expand Down Expand Up @@ -154,6 +155,16 @@ def clean_dates(date_facet):
return new_date_facet

def get_ngram_data(authority_ids):
"""
Method for generating data necessary to produce ngram playground visualizations

:param list authority_ids: list of CBA IDs selected by user
:returns:
-ngrams (:py:class:`list`) - A list of objects containing a year and frequency count representing an ngram
-max_year (:py:class:`int`) - The latest year in the date range
-min_year (:py:class:`int`) - The earliest year in the date range
-max_frequency (:py:class:`int`) - The highest frequency count of any ngram graphed
"""
sqs_all = SearchQuerySet().models(Citation).auto_query('*').facet('publication_date')
all_facet_results = sqs_all.all().exclude(public="false")
all_pub_date_facet = all_facet_results.facet_counts()['fields']['publication_date'] if 'fields' in all_facet_results.facet_counts() else []
Expand Down Expand Up @@ -200,12 +211,12 @@ def generate_genealogy_link(source, target, thesis, link_type):

def generate_link_value():
if link_type == "alma_mater":
return next_year - thesis.publication_date.year if thesis.publication_date.year else 1
return next_year - thesis.publication_date.year if thesis.publication_date and thesis.publication_date.year else 1
else:
return 15

thesis_title = thesis.title if thesis.title else None
thesis_year = thesis.publication_date.year if thesis.publication_date.year else None
thesis_year = thesis.publication_date.year if thesis.publication_date and thesis.publication_date.year else None
thesis_id = thesis.id

link = {
Expand All @@ -224,11 +235,12 @@ def generate_genealogy_node(authority, subjects):
"""
Method to build a geneology node.

FIXME: Paul please add details.
py:function:: generate_genealogy_node(authority, subjects)

Returns:
- node: a JSON object containing the node properties.
- node_association_counts: count of associtated theses of node
:param authority: a queryset object representing the authority (PERSON or INSTITUTION) that will form the node
:param list subjects: a list of CBA IDs representing the selected subjects
:return: a dict containing the metadata necessary to properly generate, label, and style a node
:rtype: dict
"""
theses_hosted_by_school = None
theses_advised = None
Expand All @@ -239,6 +251,7 @@ def generate_genealogy_node(authority, subjects):
thesis_title = ''
thesis_year = None
theses_advised_count = 0
advisor_name = ''

associated_theses = ACRelation.objects.filter(
public=True,
Expand All @@ -256,6 +269,14 @@ def generate_genealogy_node(authority, subjects):
authority__public=True,
authority__id=authority.id,
type_controlled=ACRelation.AUTHOR).values_list('citation__id', flat=True)

advisor_acr = ACRelation.objects.filter(
public=True,
citation__id__in=thesis_written,
type_controlled=ACRelation.ADVISOR).first()

if advisor_acr:
advisor_name = advisor_acr.authority.name

alma_mater_acr = ACRelation.objects.filter(
public=True,
Expand All @@ -265,7 +286,7 @@ def generate_genealogy_node(authority, subjects):
if alma_mater_acr:
alma_mater = alma_mater_acr.authority.name
thesis_title = alma_mater_acr.citation.title
thesis_year = alma_mater_acr.citation.publication_date.year
thesis_year = alma_mater_acr.citation.publication_date.year if alma_mater_acr.citation.publication_date else 0

theses_advised = associated_theses.filter(type_controlled=ACRelation.ADVISOR)
thesis_earliest = theses_advised.first().citation.publication_date.year if theses_advised and theses_advised.first().citation.publication_date else 0
Expand All @@ -279,8 +300,8 @@ def generate_genealogy_node(authority, subjects):
elif authority.type_controlled == Authority.INSTITUTION:
theses_hosted_by_school = associated_theses.count()
if theses_hosted_by_school:
thesis_earliest = associated_theses.first().citation.publication_date.year
thesis_latest = associated_theses.last().citation.publication_date.year
thesis_earliest = associated_theses.first().citation.publication_date.year if associated_theses.first().citation.publication_date else 0
thesis_latest = associated_theses.last().citation.publication_date.year if associated_theses.last().citation.publication_date else 0

node_associations_count = associated_theses.count()
node = {
Expand All @@ -292,6 +313,7 @@ def generate_genealogy_node(authority, subjects):
"theses_advised": theses_advised_count,
"employers": list(employers),
"alma_mater": alma_mater,
"advisor_name": advisor_name,
"thesis_title": thesis_title,
"thesis_year": thesis_year,
"thesis_earliest": thesis_earliest,
Expand All @@ -303,6 +325,26 @@ def generate_genealogy_node(authority, subjects):
return node, node_associations_count

def extrapolate_thesis(thesis, node_ids, links, domino_effect, subjects):
"""
Method to expand the family tree by extrapolating from relations.
For any given thesis, it's advisor, author, and host school are added to the
masterlist of nodes, if not already present, and links connecting these
nodes are added to the masterlist of links, if not already present.
When the "domino chain reaction" toggle is activated, this method instantiates
nodes of upstream relations until a leaf is reached, not just instantiating the
nodes that are direct neighbors to the given thesis

py:function:: extrapoloate_thesis(thesis, node_ids, links, domino_effect, subjects)

:param thesis: a queryset object representing a thesis to be extrapolated from
:param list node_ids: a list of CBA IDs for all nodes in the graph
:param list links: a list of dicts containing the metadata for all links in the graph
:param bool domino_effect: the result of user choice to activate domino_effect
:param list subjects: a list of CBA IDs representing the selected subjects
:return: None
"""

school = None
acrs = ACRelation.objects.filter(
public=True,
authority__public=True,
Expand Down
23 changes: 15 additions & 8 deletions isiscb/isisdata/templates/isisdata/genealogy.html
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
{% block content %}
<div>
<div class='col-sm-12 col-md-3' id="control-column" style="padding: 0;">
<span><b>Domino chain-reaction?</b></span>
<span><b>Domino chain-reaction? (warning: slower)</b></span>
<div class="btn-group btn-toggle" id="domino-button" style="margin-bottom: 15px;">
<button class="btn btn-xs btn-default">ON</button>
<button class="btn btn-xs btn-danger active">OFF</button>
Expand Down Expand Up @@ -88,9 +88,10 @@ <h5 style="margin-top: 0;">All Nodes</h5>
<div class="well well-lg" style="border-left: 5px solid #337ab7">
<h3>Using this exploratory tool</h3>
<h4>This is a tool for exploring how different scholars and schools in the database are directly and indirectly connected through genealogical networks of theses, advisors, advisees, and alma maters, almas mater?, whatever.</h4>
<p>The search bar finds people and schools to explore. </p>
<p>The <i class="fas fa-broom"></i> icon adds the person you selected and their advisor and alma mater and/or their advisees and employer to the graph or adds the school you selected and its students and their advisors to the graph.</p>
<p>Nodes represent people or schools; links represent the theses that bind them.</p>
<p>The search bar finds people, schools, concepts, and geographic terms to generate these genealogical networks. </p>
<p>If you search for people or schools, their directly related theses will be used to create the graph. If you search for a concept or geographic term, the graph will be generated from those scholars who have published work linked to that concept or place.</p>
<p>The <i class="fas fa-broom"></i> icon adds subjects to the graph.
<p>Nodes represent advisers, dissertators, or schools; links represent the theses that bind them.</p>
<p>Hovering over a node in the graph displays the subject type and name and provides a link to the subject's Authority page.</p>
<p>Clicking a node "explodes" that node to reveal more relationships (allowing you to "follow the breadcrumb trail" from node to node through the data).</p>
<p>Clicking a linkage between nodes opens the citation page for that thesis in a new tab.</p>
Expand Down Expand Up @@ -123,7 +124,7 @@ <h4>This is a tool for exploring how different scholars and schools in the datab
var subjects = [];
var INITIAL_MAX_RESULTS = 10;
var max_results = INITIAL_MAX_RESULTS;
var types = ['PE','IN'];
var types = ['PE','IN', 'CO', 'GE'];
const typeMap = {
"CO": "Concept",
"TI": "Time Period",
Expand Down Expand Up @@ -453,7 +454,13 @@ <h4>This is a tool for exploring how different scholars and schools in the datab
extra = `has hosted ${d.theses_hosted_by_school} ${d.theses_hosted_by_school > 1 ? 'theses' : 'thesis'} between ${d.thesis_earliest} & ${d.thesis_latest}<br>`;
} else if (d.type == 'PE') {
if (d.alma_mater != '') {
extra = `completed their thesis at ${d.alma_mater} in ${d.thesis_year}<br>`;
extra = `completed their thesis at ${d.alma_mater} `;
}
if (d.advisor_name != '') {
extra += `under the supervision of ${d.advisor_name} `
}
if (d.thesis_year) {
extra += `in ${d.thesis_year}<br>`
}
if (d.theses_advised > 0) {
extra += `has supervised ${d.theses_advised} ${d.theses_advised > 1 ? 'theses' : 'thesis'} between ${d.thesis_earliest} & ${d.thesis_latest}<br>`
Expand Down Expand Up @@ -533,7 +540,7 @@ <h4>This is a tool for exploring how different scholars and schools in the datab
"PU": "#fff"
}

const nodeSizeScale = d3.scaleLog([node_associations_range.min + 1 , node_associations_range.max], [5, 15])
const nodeSizeScale = d3.scaleLog([node_associations_range.min + 1 , node_associations_range.max], [5, 12])

let typesPresent = [];
nodes.map(node => {
Expand Down Expand Up @@ -633,7 +640,7 @@ <h4>This is a tool for exploring how different scholars and schools in the datab
//takes the link value (number of citations connected to both nodes of the link) and scales it to the range specified to serve as the link strength
let linkForceScale = d3.scaleLinear()
.domain([Math.min(...linkValues), Math.max(...linkValues)])
.range([.5, 1.5]);
.range([.3, 1]);

//takes the link value (number of citations connected to both nodes of the link) and scales it to the range specified to serve as the link thickness
/*
Expand Down