This commit is contained in:
2025-09-16 11:09:45 +02:00
parent 5e73e03486
commit ef968f8121

241
f.py
View File

@@ -1,241 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Simulation du processus de création de liens entre pages web sous l'influence de moteurs de recherche,
conformément à l'énoncé "Modélisations mathématiques — BUT3 Info Fontainebleau".
Fonctionnalités :
- Initialisation d'un graphe orienté de n pages avec pertinence p(x) dans [0,1]
- Scores de recherche au choix : "indegree_plus_one" (degré entrant + 1, normalisé) ou "pagerank"
- À chaque itération : on choisit x au hasard, puis y selon la distribution des scores s(y),
puis on ajoute le lien x->y avec probabilité p(y)
- Politiques : interdiction des boucles x->x, politique de remplacement optionnelle
- Arrêt : quand chaque page a au moins ceil(ln n) liens sortants OU stagnation
- Visualisation : graphe final (taille des nœuds = score), historique des nouveaux liens
- Export CSV et PNG
"""
import numpy as np
import random
import math
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from dataclasses import dataclass
from typing import Optional, Tuple, Dict, List
# ----------------------------
# Configuration de simulation
# ----------------------------
@dataclass
class WebSimulationConfig:
n: int = 30
seed: Optional[int] = 42
score_mode: str = "indegree_plus_one" # "indegree_plus_one" | "pagerank"
use_realistic_p: bool = False
beta_params: Tuple[float, float] = (2.0, 5.0)
max_iters: int = 20000
min_out_links: Optional[int] = None
stagnation_patience: int = 500
allow_self_loops: bool = False
replacement_policy: str = "none" # "none" | "replace_worst_if_better"
pagerank_damping: float = 0.85
pagerank_tol: float = 1.0e-08
pagerank_max_iter: int = 100
# ----------------------------
# Simulation
# ----------------------------
class WebSimulation:
def __init__(self, config: WebSimulationConfig):
self.cfg = config
if self.cfg.seed is not None:
np.random.seed(self.cfg.seed)
random.seed(self.cfg.seed)
self.G = nx.DiGraph()
self.G.add_nodes_from(range(self.cfg.n))
if self.cfg.use_realistic_p:
a, b = self.cfg.beta_params
p_vals = np.random.beta(a, b, size=self.cfg.n)
else:
p_vals = np.random.rand(self.cfg.n)
self.p = {i: float(p_vals[i]) for i in range(self.cfg.n)}
self.scores = {i: 1.0 / self.cfg.n for i in range(self.cfg.n)}
self.min_out_links = (math.ceil(math.log(self.cfg.n)) if self.cfg.min_out_links is None
else self.cfg.min_out_links)
def _compute_scores_indegree_plus_one(self) -> Dict[int, float]:
raw = np.array([self.G.in_degree(i) + 1 for i in range(self.cfg.n)], dtype=float)
total = raw.sum()
if total <= 0:
return {i: 1.0 / self.cfg.n for i in range(self.cfg.n)}
return {i: float(raw[i] / total) for i in range(self.cfg.n)}
def _compute_scores_pagerank(self) -> Dict[int, float]:
try:
pr = nx.pagerank(self.G, alpha=self.cfg.pagerank_damping,
tol=self.cfg.pagerank_tol,
max_iter=self.cfg.pagerank_max_iter)
except nx.PowerIterationFailedConvergence:
pr = {i: 1.0 / self.cfg.n for i in range(self.cfg.n)}
return pr
def recompute_scores(self) -> None:
if self.cfg.score_mode == "indegree_plus_one":
self.scores = self._compute_scores_indegree_plus_one()
elif self.cfg.score_mode == "pagerank":
self.scores = self._compute_scores_pagerank()
else:
raise ValueError("Unknown score_mode")
def _choose_y_weighted_by_scores(self) -> int:
nodes = list(range(self.cfg.n))
weights = np.array([self.scores[i] for i in nodes], dtype=float)
weights = weights / weights.sum()
return int(np.random.choice(nodes, p=weights))
def _maybe_add_edge(self, x: int, y: int) -> bool:
if (not self.cfg.allow_self_loops) and (x == y):
return False
accept = (random.random() <= self.p[y])
if not accept:
return False
if self.cfg.replacement_policy == "none":
if not self.G.has_edge(x, y):
self.G.add_edge(x, y, weight=1.0)
return True
return False
if self.cfg.replacement_policy == "replace_worst_if_better":
out_neighbors = list(self.G.successors(x))
if y in out_neighbors:
return False
if len(out_neighbors) < self.min_out_links:
self.G.add_edge(x, y, weight=1.0)
return True
worst = min(out_neighbors, key=lambda t: self.p[t]) if out_neighbors else None
if worst is None:
self.G.add_edge(x, y, weight=1.0)
return True
if self.p[y] > self.p[worst]:
self.G.remove_edge(x, worst)
self.G.add_edge(x, y, weight=1.0)
return True
return False
raise ValueError("Unknown replacement_policy")
def _meets_stopping_condition(self, stagnation_steps: int) -> bool:
if all(self.G.out_degree(i) >= self.min_out_links for i in range(self.cfg.n)):
return True
if stagnation_steps >= self.cfg.stagnation_patience:
return True
return False
def run(self):
history_new_edges = []
stagnation = 0
iters = 0
while iters < self.cfg.max_iters:
x = random.randrange(self.cfg.n)
y = self._choose_y_weighted_by_scores()
changed = self._maybe_add_edge(x, y)
history_new_edges.append(1 if changed else 0)
if changed:
stagnation = 0
self.recompute_scores()
else:
stagnation += 1
iters += 1
if self._meets_stopping_condition(stagnation):
break
self.recompute_scores()
return self.G, self.p, self.scores, history_new_edges, iters
# ----------------------------
# Outils d'analyse & tracés
# ----------------------------
def summarize(G, p, scores) -> pd.DataFrame:
rows = []
for i in G.nodes():
rows.append({
"node": i,
"pertinence_p": p[i],
"score": scores[i],
"in_degree": G.in_degree(i),
"out_degree": G.out_degree(i),
})
return pd.DataFrame(rows).sort_values(["score", "pertinence_p"], ascending=[False, False]).reset_index(drop=True)
def plot_expected_graph(G, scores, title="Graphe attendu (taille = score)"):
# Disposition circulaire stricte
pos = nx.circular_layout(G)
# Taille des nœuds proportionnelle aux scores
score_vals = np.array([scores[i] for i in G.nodes()])
node_sizes = 3000 * (score_vals / score_vals.max() + 0.05)
plt.figure(figsize=(7, 7))
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color="dodgerblue")
nx.draw_networkx_edges(
G, pos, arrows=True, arrowstyle="-|>", arrowsize=8,
edge_color="black", width=0.8
)
plt.axis("off")
plt.title(title)
plt.tight_layout()
plt.show()
def plot_history(history):
plt.figure(figsize=(10, 3))
plt.plot(history)
plt.title("Nouveaux liens (1) vs itérations")
plt.xlabel("Itération")
plt.ylabel("Nouveau lien ?")
plt.tight_layout()
plt.show()
# ----------------------------
# Main
# ----------------------------
def main():
cfg = WebSimulationConfig(
n=30,
seed=123,
score_mode="indegree_plus_one", # ou "pagerank"
use_realistic_p=True,
beta_params=(2.5, 4.0),
max_iters=10000,
min_out_links=None, # -> ceil(ln n)
stagnation_patience=1000,
allow_self_loops=False,
replacement_policy="replace_worst_if_better",
)
sim = WebSimulation(cfg)
G, p, scores, history, iters = sim.run()
print(f"Itérations effectuées: {iters}")
df = summarize(G, p, scores)
print(df.head(10))
# Figures
plot_expected_graph(G, scores)
plot_history(history)
# Exports
df.to_csv("resume_pages.csv", index=False)
plt.figure()
plot_expected_graph(G, scores, title="Graphe final exporté")
plt.savefig("graphe_final.png", dpi=150, bbox_inches="tight")
plt.close()
plt.figure()
plot_history(history)
plt.savefig("historique_liens.png", dpi=150, bbox_inches="tight")
plt.close()
if __name__ == "__main__":
main()