gr_net = df_full_personas_experiences_plus.with_columns(pl.col('company_id').str.to_uppercase()).group_by('persona_id','company_id').agg(pl.len().alias('count')).sort('count')
list_top_in_network = gr_net['company_id'].value_counts().sort('count', descending=True)['company_id'].to_list()[:5]
gr_net_f = gr_net.filter(pl.col('company_id').is_in(list_top_in_network))
list_letters = ['A','B','C','D','E','F','G','H']
dict_company = {}
dict_company_rev = {}
for company, letter in zip(list_top_in_network, list_letters ):
dict_company[letter] = company
dict_company_rev[company] = letter
gr_gr_net_f = gr_net_f.sort('company_id').group_by('persona_id').agg(pl.col('company_id').unique().sort(),)
gr_gr_net_f2 = (
gr_gr_net_f['company_id']
.value_counts()
.with_columns(
# pl.col('company_id').list.join(', '),
(pl.col('count')/len(gr_gr_net_f)).alias('per')
)
.sort('per',descending=True)
)
list_prob = []
for i in range(len(gr_gr_net_f2)):
tmp_prob_letters = []
for k in dict_company.keys():
if dict_company[k] in gr_gr_net_f2[i]['company_id'][0].to_list():
tmp_prob_letters.append(f' {k}')
else:
tmp_prob_letters.append(f'¬{k}')
list_prob.append(f"P({' ∩ '.join(tmp_prob_letters)}) = {round(gr_gr_net_f2[i]['per'][0],4)}")
annon_prob_text = "<b>Probability Distribution:</b><br>" + '<br>'.join(list_prob)
# Create network graph
G = nx.Graph()
for persona, company in gr_net_f.select(['persona_id', 'company_id']).iter_rows():
G.add_edge(persona, company)
# Get unique values
persona_ids = gr_net_f['persona_id'].unique().to_list()
company_ids = gr_net_f['company_id'].unique().to_list()
# Calculate degrees (connection counts)
degree_dict = dict(G.degree())
# Get min and max degrees for scaling
company_degrees = [degree_dict[c] for c in company_ids]
persona_degrees = [degree_dict[p] for p in persona_ids]
min_company_degree = min(company_degrees) if company_degrees else 1
max_company_degree = max(company_degrees) if company_degrees else 1
min_persona_degree = min(persona_degrees) if persona_degrees else 1
max_persona_degree = max(persona_degrees) if persona_degrees else 1
# Define size ranges
COMPANY_MIN_SIZE = 25
COMPANY_MAX_SIZE = 100
PERSONA_MIN_SIZE = 5
PERSONA_MAX_SIZE = 20
# print(f"Company connections range: {min_company_degree} - {max_company_degree}")
# print(f"Persona connections range: {min_persona_degree} - {max_persona_degree}")
# Sort companies by degree (size) in descending order
company_ids_sorted = sorted(company_ids, key=lambda x: degree_dict[x], reverse=True)
# Check if "Nokia" exists in the data
HIGHLIGHTED_COMPANY = current_company_id
HIGHLIGHTED_COMPANY_EXISTS = HIGHLIGHTED_COMPANY.lower() in [str(c).lower() for c in company_ids]
if HIGHLIGHTED_COMPANY_EXISTS:
# Get the actual case-sensitive name
highlighted_company = next(c for c in company_ids if str(c).lower() == HIGHLIGHTED_COMPANY.lower())
# print(f"Highlighting company: {highlighted_company} (with {degree_dict[highlighted_company]} connections)")
else:
# print(f"Warning: '{HIGHLIGHTED_COMPANY}' not found in company list")
highlighted_company = None
# Create layout (companies on outer circle, ordered by size)
pos = {}
num_companies = len(company_ids_sorted)
radius_outer = 2.0
# Position companies on circle, ordered by size (largest first)
for i, company in enumerate(company_ids_sorted):
# Start at top (90° or π/2 radians) and go counter-clockwise (add angle)
# Counter-clockwise rotation: angle = start_angle + (i * 2π / num_companies)
# This puts largest at top, next on left, then bottom, then right
start_angle = np.pi / 2 # 90° at top
# For counter-clockwise rotation
angle = start_angle - (2 * np.pi * i / num_companies)
# Convert to x, y coordinates
pos[company] = (radius_outer * np.cos(angle), radius_outer * np.sin(angle))
# Position personas
for i, persona in enumerate(persona_ids):
connected_companies = [c for c in company_ids if G.has_edge(persona, c)]
if connected_companies:
avg_x = np.mean([pos[c][0] for c in connected_companies])
avg_y = np.mean([pos[c][1] for c in connected_companies])
# Add jitter to spread out personas
jitter_x = np.random.uniform(-0.2, 0.2)
jitter_y = np.random.uniform(-0.2, 0.2)
pos[persona] = (avg_x * 0.5 + jitter_x, avg_y * 0.5 + jitter_y)
else:
pos[persona] = (0, 0)
# Prepare edge traces
edge_x, edge_y = [], []
for edge in G.edges():
x0, y0 = pos[edge[0]]
x1, y1 = pos[edge[1]]
edge_x.extend([x0, x1, None])
edge_y.extend([y0, y1, None])
edge_trace = go.Scatter(
x=edge_x, y=edge_y,
line=dict(width=0.6, color='rgba(120, 120, 120, 0.15)'),
hoverinfo='none',
mode='lines')
# Prepare node traces with proportional sizing
company_x, company_y, company_text = [], [], []
company_color, company_size, company_hover = [], [], []
company_border_width = [] # For border thickness
company_border_color = [] # For border color
persona_x, persona_y = [], []
persona_color, persona_size, persona_hover = [], [], []
# Helper function to scale size proportionally
def scale_size(value, min_val, max_val, min_size, max_size):
if max_val == min_val:
return (min_size + max_size) / 2
return min_size + (value - min_val) / (max_val - min_val) * (max_size - min_size)
# Add COMPANY nodes in sorted order (largest first)
for company in company_ids_sorted:
x, y = pos[company]
company_x.append(x)
company_y.append(y)
company_text.append(str(company))
company_color.append('#EF553B')
connections = degree_dict[company]
# Scale size based on connection count
scaled_size = scale_size(
connections,
min_company_degree,
max_company_degree,
COMPANY_MIN_SIZE,
COMPANY_MAX_SIZE
)
company_size.append(scaled_size)
# Custom border for highlighted company
if highlighted_company and company == highlighted_company:
company_border_width.append(4) # Thicker border
company_border_color.append('#000000') # Black border
else:
company_border_width.append(1)
company_border_color.append('#000000')
# Hover text
personas = gr_net_f.filter(pl.col('company_id') == company)['persona_id'].to_list()
rank = company_ids_sorted.index(company) + 1
hover_text = f"<b>Company #{rank}:</b> {company}<br>"
hover_text += f"<b>Personas worked here:</b> {connections}<br>"
hover_text += f"<b>Connection rank:</b> {rank}/{len(company_ids_sorted)}<br>"
if connections > 0:
for persona in personas[:5]:
persona_name = df_all_personas.filter(pl.col('persona_id')==persona)['full_name'][0].title()
hover_text += f" • {persona_name}<br>"
if connections > 5:
hover_text += f" • ... and {connections - 5} more"
company_hover.append(hover_text)
# Add PERSONA nodes
for persona in persona_ids:
x, y = pos[persona]
persona_x.append(x)
persona_y.append(y)
persona_color.append('#636efa')
connections = degree_dict[persona]
# Scale size based on connection count
scaled_size = scale_size(
connections,
min_persona_degree,
max_persona_degree,
PERSONA_MIN_SIZE,
PERSONA_MAX_SIZE
)
persona_size.append(scaled_size)
# Hover text
companies = gr_net_f.filter(pl.col('persona_id') == persona)['company_id'].to_list()
persona_name = df_all_personas.filter(pl.col('persona_id')==persona)['full_name'][0].title()
hover_text = f"<b>Persona:</b> {persona_name}<br>"
hover_text += f"<b>Companies worked at:</b> {connections}<br>"
if connections > 0:
# Check if worked at highlighted company
if highlighted_company:
worked_at_highlighted = highlighted_company in companies
if worked_at_highlighted:
hover_text += f"<b>Worked at {highlighted_company}:</b> ✓<br>"
hover_text += "<br>".join([f" • {comp}" for comp in companies[:5]])
if connections > 5:
hover_text += f"<br> • ... and {connections - 5} more"
persona_hover.append(hover_text)
# Create company node trace
company_trace = go.Scatter(
x=company_x, y=company_y,
mode='markers+text',
hoverinfo='text',
hovertext=company_hover,
text=company_text,
textposition="top center",
textfont=dict(size=14, color='black'),
marker=dict(
color=company_color,
size=company_size,
line=dict(
width=company_border_width,
color=company_border_color
),
opacity=0.9)
)
# Create persona node trace
persona_trace = go.Scatter(
x=persona_x, y=persona_y,
mode='markers',
hoverinfo='text',
hovertext=persona_hover,
text=None, # No text for personas
marker=dict(
color=persona_color,
size=persona_size,
line=dict(width=1, color='black'),
opacity=0.7)
)
# Calculate axis ranges for 1:1 aspect ratio
all_positions = list(pos.values())
x_vals = [p[0] for p in all_positions]
y_vals = [p[1] for p in all_positions]
# Add padding
x_range = [min(x_vals) - 0.5, max(x_vals) + 0.5]
y_range = [min(y_vals) - 0.5, max(y_vals) + 0.5]
# Make axes have the same range for 1:1 aspect
max_range = max(x_range[1] - x_range[0], y_range[1] - y_range[0])
x_center = (x_range[0] + x_range[1]) / 2
y_center = (y_range[0] + y_range[1]) / 2
x_range = [x_center - max_range/2, x_center + max_range/2]
y_range = [y_center - max_range/2, y_center + max_range/2]
# Create figure with 1:1 aspect ratio
fig = go.Figure(data=[edge_trace, persona_trace, company_trace],
layout=go.Layout(
title=f'Persona-Company Network (Companies Ordered by Size)<br><sup>Highlighted: {highlighted_company if highlighted_company else "None"}</sup>',
showlegend=False,
hovermode='closest',
margin=dict(b=20, l=20, r=20, t=100),
xaxis=dict(
showgrid=False,
zeroline=False,
showticklabels=False,
range=x_range,
scaleanchor="y",
scaleratio=1
),
yaxis=dict(
showgrid=False,
zeroline=False,
showticklabels=False,
range=y_range
),
plot_bgcolor='white',
paper_bgcolor='white',
width=900,
height=900
))
# Add legend with size examples and highlighting info
# legend_text = f"""
# <b>Node Size = Connection Count</b><br>
# <span style='color:#EF553B'>● Companies</span><br>
# <span style='color:#636efa'>● Personas</span> (hover for details)
# """
# fig.add_annotation(
# x=0.98, y=0.98,
# xref="paper", yref="paper",
# text=legend_text,
# showarrow=False,
# font=dict(size=14),
# align="left",
# bgcolor="rgba(255, 255, 255, 0.95)",
# )
# Add top companies list
top_companies = company_ids_sorted[:10] # Top 10 companies
top_companies_text = "<b>Top Companies by Connections:</b><br>"
for i, company in enumerate(top_companies, 1):
connections = degree_dict[company]
top_connections = degree_dict[top_companies[0]]
connections_per = f" | {round(connections/top_connections*100)}%" if highlighted_company and company != highlighted_company else ""
highlight_indicator = " " if highlighted_company and company == highlighted_company else ""
top_companies_text += f"{dict_company_rev[company]}. {company}: {connections} {connections_per} {highlight_indicator}<br>"
fig.add_annotation(
x=0.02, y=0.98,
xref="paper", yref="paper",
text=top_companies_text,
showarrow=False,
font=dict(size=14),
align="left",
bgcolor="rgba(255, 255, 255, 0.9)",
# bordercolor="#666",
# borderwidth=1
)
# Add probabiliy list
fig.add_annotation(
x=0.98, y=0.98,
xref="paper", yref="paper",
text=annon_prob_text,
showarrow=False,
font=dict(
family="'Courier New', monospace", # Multiple fallbacks
size=12,
color="black"
),
align="left",
bgcolor="rgba(255, 255, 255, 0.95)",
)
fig.write_image((path_output_images/f'network_{current_company_id}.webp'))
fig.show()