diff --git a/f.py b/f.py deleted file mode 100644 index 730ed10..0000000 --- a/f.py +++ /dev/null @@ -1,241 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Simulation du processus de création de liens entre pages web sous l'influence de moteurs de recherche, -conformément à l'énoncé "Modélisations mathématiques — BUT3 Info Fontainebleau". - -Fonctionnalités : -- Initialisation d'un graphe orienté de n pages avec pertinence p(x) dans [0,1] -- Scores de recherche au choix : "indegree_plus_one" (degré entrant + 1, normalisé) ou "pagerank" -- À chaque itération : on choisit x au hasard, puis y selon la distribution des scores s(y), - puis on ajoute le lien x->y avec probabilité p(y) -- Politiques : interdiction des boucles x->x, politique de remplacement optionnelle -- Arrêt : quand chaque page a au moins ceil(ln n) liens sortants OU stagnation -- Visualisation : graphe final (taille des nœuds = score), historique des nouveaux liens -- Export CSV et PNG -""" - -import numpy as np -import random -import math -import pandas as pd -import networkx as nx -import matplotlib.pyplot as plt -from dataclasses import dataclass -from typing import Optional, Tuple, Dict, List - -# ---------------------------- -# Configuration de simulation -# ---------------------------- - -@dataclass -class WebSimulationConfig: - n: int = 30 - seed: Optional[int] = 42 - score_mode: str = "indegree_plus_one" # "indegree_plus_one" | "pagerank" - use_realistic_p: bool = False - beta_params: Tuple[float, float] = (2.0, 5.0) - max_iters: int = 20000 - min_out_links: Optional[int] = None - stagnation_patience: int = 500 - allow_self_loops: bool = False - replacement_policy: str = "none" # "none" | "replace_worst_if_better" - pagerank_damping: float = 0.85 - pagerank_tol: float = 1.0e-08 - pagerank_max_iter: int = 100 - -# ---------------------------- -# Simulation -# ---------------------------- - -class WebSimulation: - def __init__(self, config: WebSimulationConfig): - self.cfg = config - if self.cfg.seed is not None: - np.random.seed(self.cfg.seed) - random.seed(self.cfg.seed) - self.G = nx.DiGraph() - self.G.add_nodes_from(range(self.cfg.n)) - if self.cfg.use_realistic_p: - a, b = self.cfg.beta_params - p_vals = np.random.beta(a, b, size=self.cfg.n) - else: - p_vals = np.random.rand(self.cfg.n) - self.p = {i: float(p_vals[i]) for i in range(self.cfg.n)} - self.scores = {i: 1.0 / self.cfg.n for i in range(self.cfg.n)} - self.min_out_links = (math.ceil(math.log(self.cfg.n)) if self.cfg.min_out_links is None - else self.cfg.min_out_links) - - def _compute_scores_indegree_plus_one(self) -> Dict[int, float]: - raw = np.array([self.G.in_degree(i) + 1 for i in range(self.cfg.n)], dtype=float) - total = raw.sum() - if total <= 0: - return {i: 1.0 / self.cfg.n for i in range(self.cfg.n)} - return {i: float(raw[i] / total) for i in range(self.cfg.n)} - - def _compute_scores_pagerank(self) -> Dict[int, float]: - try: - pr = nx.pagerank(self.G, alpha=self.cfg.pagerank_damping, - tol=self.cfg.pagerank_tol, - max_iter=self.cfg.pagerank_max_iter) - except nx.PowerIterationFailedConvergence: - pr = {i: 1.0 / self.cfg.n for i in range(self.cfg.n)} - return pr - - def recompute_scores(self) -> None: - if self.cfg.score_mode == "indegree_plus_one": - self.scores = self._compute_scores_indegree_plus_one() - elif self.cfg.score_mode == "pagerank": - self.scores = self._compute_scores_pagerank() - else: - raise ValueError("Unknown score_mode") - - def _choose_y_weighted_by_scores(self) -> int: - nodes = list(range(self.cfg.n)) - weights = np.array([self.scores[i] for i in nodes], dtype=float) - weights = weights / weights.sum() - return int(np.random.choice(nodes, p=weights)) - - def _maybe_add_edge(self, x: int, y: int) -> bool: - if (not self.cfg.allow_self_loops) and (x == y): - return False - accept = (random.random() <= self.p[y]) - if not accept: - return False - - if self.cfg.replacement_policy == "none": - if not self.G.has_edge(x, y): - self.G.add_edge(x, y, weight=1.0) - return True - return False - - if self.cfg.replacement_policy == "replace_worst_if_better": - out_neighbors = list(self.G.successors(x)) - if y in out_neighbors: - return False - if len(out_neighbors) < self.min_out_links: - self.G.add_edge(x, y, weight=1.0) - return True - worst = min(out_neighbors, key=lambda t: self.p[t]) if out_neighbors else None - if worst is None: - self.G.add_edge(x, y, weight=1.0) - return True - if self.p[y] > self.p[worst]: - self.G.remove_edge(x, worst) - self.G.add_edge(x, y, weight=1.0) - return True - return False - - raise ValueError("Unknown replacement_policy") - - def _meets_stopping_condition(self, stagnation_steps: int) -> bool: - if all(self.G.out_degree(i) >= self.min_out_links for i in range(self.cfg.n)): - return True - if stagnation_steps >= self.cfg.stagnation_patience: - return True - return False - - def run(self): - history_new_edges = [] - stagnation = 0 - iters = 0 - while iters < self.cfg.max_iters: - x = random.randrange(self.cfg.n) - y = self._choose_y_weighted_by_scores() - changed = self._maybe_add_edge(x, y) - history_new_edges.append(1 if changed else 0) - if changed: - stagnation = 0 - self.recompute_scores() - else: - stagnation += 1 - iters += 1 - if self._meets_stopping_condition(stagnation): - break - self.recompute_scores() - return self.G, self.p, self.scores, history_new_edges, iters - -# ---------------------------- -# Outils d'analyse & tracés -# ---------------------------- - -def summarize(G, p, scores) -> pd.DataFrame: - rows = [] - for i in G.nodes(): - rows.append({ - "node": i, - "pertinence_p": p[i], - "score": scores[i], - "in_degree": G.in_degree(i), - "out_degree": G.out_degree(i), - }) - return pd.DataFrame(rows).sort_values(["score", "pertinence_p"], ascending=[False, False]).reset_index(drop=True) - -def plot_expected_graph(G, scores, title="Graphe attendu (taille = score)"): - # Disposition circulaire stricte - pos = nx.circular_layout(G) - - # Taille des nœuds proportionnelle aux scores - score_vals = np.array([scores[i] for i in G.nodes()]) - node_sizes = 3000 * (score_vals / score_vals.max() + 0.05) - - plt.figure(figsize=(7, 7)) - nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color="dodgerblue") - nx.draw_networkx_edges( - G, pos, arrows=True, arrowstyle="-|>", arrowsize=8, - edge_color="black", width=0.8 - ) - plt.axis("off") - plt.title(title) - plt.tight_layout() - plt.show() - - - -def plot_history(history): - plt.figure(figsize=(10, 3)) - plt.plot(history) - plt.title("Nouveaux liens (1) vs itérations") - plt.xlabel("Itération") - plt.ylabel("Nouveau lien ?") - plt.tight_layout() - plt.show() - -# ---------------------------- -# Main -# ---------------------------- - -def main(): - cfg = WebSimulationConfig( - n=30, - seed=123, - score_mode="indegree_plus_one", # ou "pagerank" - use_realistic_p=True, - beta_params=(2.5, 4.0), - max_iters=10000, - min_out_links=None, # -> ceil(ln n) - stagnation_patience=1000, - allow_self_loops=False, - replacement_policy="replace_worst_if_better", - ) - sim = WebSimulation(cfg) - G, p, scores, history, iters = sim.run() - print(f"Itérations effectuées: {iters}") - df = summarize(G, p, scores) - print(df.head(10)) - # Figures - plot_expected_graph(G, scores) - plot_history(history) - # Exports - df.to_csv("resume_pages.csv", index=False) - plt.figure() - plot_expected_graph(G, scores, title="Graphe final exporté") - plt.savefig("graphe_final.png", dpi=150, bbox_inches="tight") - plt.close() - plt.figure() - plot_history(history) - plt.savefig("historique_liens.png", dpi=150, bbox_inches="tight") - plt.close() - -if __name__ == "__main__": - main()