Commit 1caf97ad authored by Ian Dennis Miller's avatar Ian Dennis Miller

anonymous graphml, proper suffix on api endpoints, louvain community...

anonymous graphml, proper suffix on api endpoints, louvain community detection, community phrases, community tags
parent b556ed78
Pipeline #262 passed with stage
in 18 seconds
......@@ -4,3 +4,4 @@
/build/
/dist/
/*.egg-info/
downloader.log
......@@ -3,7 +3,6 @@ from flask import Flask, jsonify, Response
from flasgger import Swagger
from rdflib.plugins.sleepycat import Sleepycat
from rdflib.store import NO_STORE, VALID_STORE
from .snapshot import Snapshot
......@@ -16,7 +15,7 @@ store = Sleepycat()
snapshot = Snapshot(screen_name, store=store)
@app.route('/leaders/<screen_name>/')
@app.route('/leaders/<screen_name>-leaders.json')
def leaders(screen_name):
"""Endpoint returning a list of accounts followed by account specified.
---
......@@ -45,7 +44,30 @@ def leaders(screen_name):
return jsonify(snapshot.query.leaders(screen_name))
@app.route('/followers/<screen_name>/')
@app.route('/leaders.json')
def all_leaders():
"""Endpoint returning all leaders edges.
---
definitions:
Screen_name:
type: string
Leaders:
type: array
items:
$ref: '#/definitions/Screen_name'
responses:
200:
description: A list of leaders
schema:
$ref: '#/definitions/Leaders'
examples:
rgb: ['red', 'green', 'blue']
"""
return jsonify(snapshot.query.all_leaders())
@app.route('/followers/<screen_name>-followers.json')
def followers(screen_name):
"""Endpoint returning a list of accounts that follow account specified.
---
......@@ -74,16 +96,10 @@ def followers(screen_name):
return jsonify(snapshot.query.followers(screen_name))
@app.route('/net/<screen_name>-ego.graphml')
def ego_net(screen_name):
@app.route('/net/social-network.graphml')
def ego_net():
"""Endpoint returning a graphml representation of a user's ego net.
---
parameters:
- name: screen_name
in: path
type: string
required: true
default: all
definitions:
Screen_name:
type: string
......@@ -99,7 +115,26 @@ def ego_net(screen_name):
return(Response(graphml, mimetype="text/xml"))
@app.route('/name/<screen_name>/')
@app.route('/net/social-network-anonymous.graphml')
def ego_net_anonymous():
"""Endpoint returning a graphml representation of a user's ego net.
---
definitions:
Screen_name:
type: string
produces:
- text/xml
responses:
200:
description: A graphml ego network
schema:
type: file
"""
graphml = snapshot.transformation.produce_graphml(anonymous=True)
return(Response(graphml, mimetype="text/xml"))
@app.route('/name/<screen_name>.json')
def name(screen_name):
"""Endpoint returning name of account specified.
---
......@@ -125,7 +160,7 @@ def name(screen_name):
return jsonify(snapshot.query.name(screen_name))
@app.route('/description/<screen_name>/')
@app.route('/description/<screen_name>-description.json')
def description(screen_name):
"""Endpoint returning description of account specified.
---
......@@ -152,7 +187,7 @@ def description(screen_name):
return jsonify(snapshot.query.description(screen_name))
@app.route('/descriptions/')
@app.route('/descriptions.json')
def descriptions():
"""Endpoint returning description of account specified.
---
......@@ -171,3 +206,24 @@ def descriptions():
"""
return jsonify(snapshot.query.all_descriptions())
@app.route('/community-tags.json')
def community_tags():
"""Endpoint returning phrases for each community
---
definitions:
Descriptions:
type: array
items:
type: string
responses:
200:
description: Account description
schema:
$ref: '#/definitions/Descriptions'
examples:
['This is an account description', 'This is an account description']
"""
return jsonify(snapshot.transformation.produce_tags())
......@@ -45,15 +45,14 @@ class Query:
def all_descriptions(self):
sparql = self.env.get_template('all_descriptions.sparql').render()
results = self.snapshot.g.query(sparql)
descriptions = [desc[0] for desc in results]
return(descriptions)
return([result for result in results])
def all_leaders(self):
sparql = self.env.get_template('all_leaders.sparql').render()
results = self.snapshot.g.query(sparql)
return(results)
return([edge for edge in results])
def all_followers(self):
sparql = self.env.get_template('all_followers.sparql').render()
results = self.snapshot.g.query(sparql)
return(results)
return([edge for edge in results])
SELECT ?x
SELECT ?name ?description
WHERE {
?sub twitter:description ?x .
?sub twitter:screen_name ?name .
?sub twitter:description ?description .
}
......@@ -2,34 +2,145 @@
from io import BytesIO
import networkx as nx
import community
from collections import defaultdict
from nltk.collocations import BigramCollocationFinder
import nltk
def remove_nodes_below_degree(graph):
g2 = graph.copy()
degree_in = graph.in_degree(graph)
degree_out = graph.out_degree(graph)
for n in graph.nodes():
if degree_in[n] + degree_out[n] <= 2:
g2.remove_node(n)
graph = g2
return(graph)
def detect_communities(graph):
g3 = graph.to_undirected()
count = 0
partition = community.best_partition(g3)
for com in set(partition.values()):
count = count + 1
list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]
for n in list_nodes:
g3.node[n]["community"] = count
graph = g3
return(graph)
def get_all_user_edges(snapshot):
followers = [[edge[1].lower(), edge[0].lower()] for edge in snapshot.query.all_followers()]
leaders = [[edge[0].lower(), edge[1].lower()] for edge in snapshot.query.all_leaders()]
edges = followers + leaders
return(edges)
def build_user_graph(edges):
graph = nx.DiGraph()
for subject_name, object_name in edges:
graph.add_node(str(subject_name))
graph.add_node(str(object_name))
graph.add_edge(str(subject_name), str(object_name))
return(graph)
def build_anonymous_graph(edges):
# create a dict that maps from names onto integers
mapping = {}
count = 0
for subject_name, object_name in edges:
for name in [subject_name, object_name]:
if name not in mapping.keys():
mapping[name] = count
count += 1
# build directed networkx graph
graph = nx.DiGraph()
for subject_name, object_name in edges:
graph.add_node(str(mapping[subject_name]))
graph.add_node(str(mapping[object_name]))
graph.add_edge(str(mapping[subject_name]), str(mapping[object_name]))
return(graph)
def create_graphml(graph):
buf = BytesIO()
nx.write_graphml(graph, buf)
buf_str = buf.getvalue()
buf.close()
return(buf_str)
def build_social_network(snapshot, anonymous):
# query for all user edges
edges = get_all_user_edges(snapshot)
# build networkx graph
if anonymous:
graph = build_anonymous_graph(edges)
else:
graph = build_user_graph(edges)
# remove nodes with degree <= 2
graph = remove_nodes_below_degree(graph)
# detect communities with louvain clustering
graph = detect_communities(graph)
return(graph)
def get_community_keywords(snapshot, graph):
results = snapshot.query.all_descriptions()
descriptions = dict([(name.lower(), description.lower()) for (name, description) in results])
community_phrases = defaultdict(str)
for n in graph.nodes():
community_phrases[graph.node[n]["community"]] += descriptions[n]
return(community_phrases)
def find_tags(community_phrases):
result = {}
for idx in community_phrases:
titles_tokens = nltk.word_tokenize(community_phrases[idx])
bigram_measures = nltk.collocations.BigramAssocMeasures()
ignored_words = nltk.corpus.stopwords.words('english')
finder = BigramCollocationFinder.from_words(titles_tokens, window_size=2)
finder.apply_word_filter(lambda w: len(w) < 3 or w in ignored_words)
# finder.apply_freq_filter(2)
measure = bigram_measures.likelihood_ratio
tag_pairs = finder.nbest(measure, 15)
result[idx] = tag_pairs
return(result)
class Transformation:
def __init__(self, snapshot):
self.snapshot = snapshot
def produce_graphml(self):
# query for all user edges
followers = [(edge[1], edge[0]) for edge in self.snapshot.query.all_followers()]
leaders = [edge for edge in self.snapshot.query.all_leaders()]
edges = followers + leaders
# build networkx graph
graph = nx.DiGraph()
for subject_name, object_name in edges:
graph.add_node(str(subject_name))
graph.add_node(str(object_name))
graph.add_edge(str(subject_name), str(object_name))
# produce graphml
buf = BytesIO()
nx.write_graphml(graph, buf)
buf_str = buf.getvalue()
buf.close()
return(buf_str)
def produce_graphml(self, anonymous=False):
graph = build_social_network(self.snapshot, anonymous=anonymous)
return(create_graphml(graph))
def produce_keywords(self):
pass
graph = build_social_network(self.snapshot, anonymous=False)
return(get_community_keywords(self.snapshot, graph))
def produce_tags(self):
phrases = self.produce_keywords()
tags = find_tags(phrases)
return(tags)
def produce_status_list(self, screen_name=None):
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment