diff --git a/f.py b/f.py new file mode 100644 index 0000000..730ed10 --- /dev/null +++ b/f.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Simulation du processus de création de liens entre pages web sous l'influence de moteurs de recherche, +conformément à l'énoncé "Modélisations mathématiques — BUT3 Info Fontainebleau". + +Fonctionnalités : +- Initialisation d'un graphe orienté de n pages avec pertinence p(x) dans [0,1] +- Scores de recherche au choix : "indegree_plus_one" (degré entrant + 1, normalisé) ou "pagerank" +- À chaque itération : on choisit x au hasard, puis y selon la distribution des scores s(y), + puis on ajoute le lien x->y avec probabilité p(y) +- Politiques : interdiction des boucles x->x, politique de remplacement optionnelle +- Arrêt : quand chaque page a au moins ceil(ln n) liens sortants OU stagnation +- Visualisation : graphe final (taille des nœuds = score), historique des nouveaux liens +- Export CSV et PNG +""" + +import numpy as np +import random +import math +import pandas as pd +import networkx as nx +import matplotlib.pyplot as plt +from dataclasses import dataclass +from typing import Optional, Tuple, Dict, List + +# ---------------------------- +# Configuration de simulation +# ---------------------------- + +@dataclass +class WebSimulationConfig: + n: int = 30 + seed: Optional[int] = 42 + score_mode: str = "indegree_plus_one" # "indegree_plus_one" | "pagerank" + use_realistic_p: bool = False + beta_params: Tuple[float, float] = (2.0, 5.0) + max_iters: int = 20000 + min_out_links: Optional[int] = None + stagnation_patience: int = 500 + allow_self_loops: bool = False + replacement_policy: str = "none" # "none" | "replace_worst_if_better" + pagerank_damping: float = 0.85 + pagerank_tol: float = 1.0e-08 + pagerank_max_iter: int = 100 + +# ---------------------------- +# Simulation +# ---------------------------- + +class WebSimulation: + def __init__(self, config: WebSimulationConfig): + self.cfg = config + if self.cfg.seed is not None: + np.random.seed(self.cfg.seed) + random.seed(self.cfg.seed) + self.G = nx.DiGraph() + self.G.add_nodes_from(range(self.cfg.n)) + if self.cfg.use_realistic_p: + a, b = self.cfg.beta_params + p_vals = np.random.beta(a, b, size=self.cfg.n) + else: + p_vals = np.random.rand(self.cfg.n) + self.p = {i: float(p_vals[i]) for i in range(self.cfg.n)} + self.scores = {i: 1.0 / self.cfg.n for i in range(self.cfg.n)} + self.min_out_links = (math.ceil(math.log(self.cfg.n)) if self.cfg.min_out_links is None + else self.cfg.min_out_links) + + def _compute_scores_indegree_plus_one(self) -> Dict[int, float]: + raw = np.array([self.G.in_degree(i) + 1 for i in range(self.cfg.n)], dtype=float) + total = raw.sum() + if total <= 0: + return {i: 1.0 / self.cfg.n for i in range(self.cfg.n)} + return {i: float(raw[i] / total) for i in range(self.cfg.n)} + + def _compute_scores_pagerank(self) -> Dict[int, float]: + try: + pr = nx.pagerank(self.G, alpha=self.cfg.pagerank_damping, + tol=self.cfg.pagerank_tol, + max_iter=self.cfg.pagerank_max_iter) + except nx.PowerIterationFailedConvergence: + pr = {i: 1.0 / self.cfg.n for i in range(self.cfg.n)} + return pr + + def recompute_scores(self) -> None: + if self.cfg.score_mode == "indegree_plus_one": + self.scores = self._compute_scores_indegree_plus_one() + elif self.cfg.score_mode == "pagerank": + self.scores = self._compute_scores_pagerank() + else: + raise ValueError("Unknown score_mode") + + def _choose_y_weighted_by_scores(self) -> int: + nodes = list(range(self.cfg.n)) + weights = np.array([self.scores[i] for i in nodes], dtype=float) + weights = weights / weights.sum() + return int(np.random.choice(nodes, p=weights)) + + def _maybe_add_edge(self, x: int, y: int) -> bool: + if (not self.cfg.allow_self_loops) and (x == y): + return False + accept = (random.random() <= self.p[y]) + if not accept: + return False + + if self.cfg.replacement_policy == "none": + if not self.G.has_edge(x, y): + self.G.add_edge(x, y, weight=1.0) + return True + return False + + if self.cfg.replacement_policy == "replace_worst_if_better": + out_neighbors = list(self.G.successors(x)) + if y in out_neighbors: + return False + if len(out_neighbors) < self.min_out_links: + self.G.add_edge(x, y, weight=1.0) + return True + worst = min(out_neighbors, key=lambda t: self.p[t]) if out_neighbors else None + if worst is None: + self.G.add_edge(x, y, weight=1.0) + return True + if self.p[y] > self.p[worst]: + self.G.remove_edge(x, worst) + self.G.add_edge(x, y, weight=1.0) + return True + return False + + raise ValueError("Unknown replacement_policy") + + def _meets_stopping_condition(self, stagnation_steps: int) -> bool: + if all(self.G.out_degree(i) >= self.min_out_links for i in range(self.cfg.n)): + return True + if stagnation_steps >= self.cfg.stagnation_patience: + return True + return False + + def run(self): + history_new_edges = [] + stagnation = 0 + iters = 0 + while iters < self.cfg.max_iters: + x = random.randrange(self.cfg.n) + y = self._choose_y_weighted_by_scores() + changed = self._maybe_add_edge(x, y) + history_new_edges.append(1 if changed else 0) + if changed: + stagnation = 0 + self.recompute_scores() + else: + stagnation += 1 + iters += 1 + if self._meets_stopping_condition(stagnation): + break + self.recompute_scores() + return self.G, self.p, self.scores, history_new_edges, iters + +# ---------------------------- +# Outils d'analyse & tracés +# ---------------------------- + +def summarize(G, p, scores) -> pd.DataFrame: + rows = [] + for i in G.nodes(): + rows.append({ + "node": i, + "pertinence_p": p[i], + "score": scores[i], + "in_degree": G.in_degree(i), + "out_degree": G.out_degree(i), + }) + return pd.DataFrame(rows).sort_values(["score", "pertinence_p"], ascending=[False, False]).reset_index(drop=True) + +def plot_expected_graph(G, scores, title="Graphe attendu (taille = score)"): + # Disposition circulaire stricte + pos = nx.circular_layout(G) + + # Taille des nœuds proportionnelle aux scores + score_vals = np.array([scores[i] for i in G.nodes()]) + node_sizes = 3000 * (score_vals / score_vals.max() + 0.05) + + plt.figure(figsize=(7, 7)) + nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color="dodgerblue") + nx.draw_networkx_edges( + G, pos, arrows=True, arrowstyle="-|>", arrowsize=8, + edge_color="black", width=0.8 + ) + plt.axis("off") + plt.title(title) + plt.tight_layout() + plt.show() + + + +def plot_history(history): + plt.figure(figsize=(10, 3)) + plt.plot(history) + plt.title("Nouveaux liens (1) vs itérations") + plt.xlabel("Itération") + plt.ylabel("Nouveau lien ?") + plt.tight_layout() + plt.show() + +# ---------------------------- +# Main +# ---------------------------- + +def main(): + cfg = WebSimulationConfig( + n=30, + seed=123, + score_mode="indegree_plus_one", # ou "pagerank" + use_realistic_p=True, + beta_params=(2.5, 4.0), + max_iters=10000, + min_out_links=None, # -> ceil(ln n) + stagnation_patience=1000, + allow_self_loops=False, + replacement_policy="replace_worst_if_better", + ) + sim = WebSimulation(cfg) + G, p, scores, history, iters = sim.run() + print(f"Itérations effectuées: {iters}") + df = summarize(G, p, scores) + print(df.head(10)) + # Figures + plot_expected_graph(G, scores) + plot_history(history) + # Exports + df.to_csv("resume_pages.csv", index=False) + plt.figure() + plot_expected_graph(G, scores, title="Graphe final exporté") + plt.savefig("graphe_final.png", dpi=150, bbox_inches="tight") + plt.close() + plt.figure() + plot_history(history) + plt.savefig("historique_liens.png", dpi=150, bbox_inches="tight") + plt.close() + +if __name__ == "__main__": + main() diff --git a/ping-report/src/stats.c b/ping-report/src/stats.c index e37fbe8..cb951b9 100644 --- a/ping-report/src/stats.c +++ b/ping-report/src/stats.c @@ -22,130 +22,106 @@ Return value : Ping value as a string or NULL if an error occured */ -void whil(FILE* fd, - char* read_line, - size_t n , - size_t nmatch, - regex_t *p_reg, + +static char* extract_ping_from_line( + const char* line, + regex_t* p_reg, regmatch_t* pmatch, - char* ping, - int start, - int end, - size_t size_ping){ + size_t nmatch +){ + int start, end; + size_t size_ping; + char* ping = NULL; - - while(getline(&read_line,&n,fd) != -1){ - - if(read_line == NULL){ - break; - } - - /* Exec regex to find ping */ - - if(regexec(p_reg,read_line,nmatch,pmatch,0) == 0){ - - /* Extract ping position from read line */ - start = (int) pmatch[1].rm_so; - end = (int) pmatch[1].rm_eo; - size_ping = (size_t) (end - start); - - /* ping string memory allocation */ - ping = malloc(sizeof(char) * (size_ping+2)); - if(ping == NULL){ - free(read_line); - read_line = NULL; - n = 0; - break; - } - - /* Create ping string */ - (void) strncpy(ping, &read_line[start], size_ping); - ping[size_ping]='\n'; - ping[size_ping+1]='\0'; - - /* Free memory */ - free(read_line); - read_line = NULL; - n = 0; - break; - } - - free(read_line); - read_line = NULL; - n = 0; + if(regexec(p_reg, line, nmatch, pmatch, 0) != 0){ + return NULL; /* no match */ } -} -/*@null@*/char* get_ping_from_temp_log(){ - /* Variables */ + /* Extract ping position from line */ + start = (int) pmatch[1].rm_so; + end = (int) pmatch[1].rm_eo; + size_ping = (size_t)(end - start); + + /* Allocate ping string */ + ping = malloc(sizeof(char) * (size_ping + 2)); + if(ping == NULL){ + return NULL; + } + + /* Copy substring */ + (void) strncpy(ping, &line[start], size_ping); + ping[size_ping] = '\n'; + ping[size_ping+1] = '\0'; + + return ping; +} + + +char* get_ping_from_temp_log(){ + FILE* fd = NULL; char* read_line = NULL; size_t n = 0; size_t nmatch = 2; - regex_t *p_reg; - regmatch_t* pmatch; + regex_t* p_reg = NULL; + regmatch_t* pmatch = NULL; char* ping = NULL; - int start; - int end; - size_t size_ping; - /* regex struct memory allocation */ - p_reg = (regex_t *) malloc(sizeof(*p_reg)); + /* regex struct allocation */ + p_reg = malloc(sizeof(*p_reg)); if(p_reg == NULL){ - return ping; /* NULL */ + return NULL; } - /* Open ping log file */ fd = fopen("/var/log/ping-report/last-ping.log","r"); if(fd == NULL){ free(p_reg); - return ping; /* NULL */ + return NULL; } - /* Construct regex to get ping from log file */ - if(regcomp(p_reg,"time=(.*) ms",REG_EXTENDED) != 0){ - if(p_reg != NULL){ - free(p_reg); - } - (void) fclose(fd); - return ping; /* NULL */ + if(regcomp(p_reg, "time=(.*) ms", REG_EXTENDED) != 0){ + free(p_reg); + fclose(fd); + return NULL; } - /* match info memory allocation */ pmatch = malloc(sizeof(*pmatch) * nmatch); if(pmatch == NULL){ - (void) fclose(fd); regfree(p_reg); free(p_reg); - return ping; /* NULL */ + fclose(fd); + return NULL; } - /* Read file */ - whil(FILE* fd, - char* read_line, - size_t n , - size_t nmatch, - regex_t *p_reg, - regmatch_t* pmatch, - char* ping, - int start, - int end, - size_t size_ping); + while(getline(&read_line, &n, fd) != -1){ + if(read_line == NULL){ + break; + } - /* free allocated memory */ + ping = extract_ping_from_line(read_line, p_reg, pmatch, nmatch); + + free(read_line); + read_line = NULL; + n = 0; + + if(ping != NULL){ + break; /* stop when ping found */ + } + } + + /* Cleanup */ regfree(p_reg); free(p_reg); free(pmatch); if(read_line != NULL){ free(read_line); } + fclose(fd); - (void) fclose(fd); - - /* ping may be null, then it must mean that the ping request was lost */ - return ping; + return ping; /* May be NULL if not found */ } + /* -- write_ping_log -- Desc : @@ -194,15 +170,48 @@ void write_ping_log(char* new_ping){ Return value : None */ + +static void process_ping_line( + const char* line, + double* sum, + double* max, + double* min, + int* nb_high, + int* nb_loss, + int* nb_ping +){ + double ping; + + if(strcmp(line,"LOSS") == 0){ + (*nb_loss)++; + return; + } + + /* Evaluate the ping as a double */ + ping = strtod(line,NULL); + if(ping < 0.1){ + return; /* Ignore null ping */ + } + + (*nb_ping)++; + + if(ping > *max){ + *max = ping; + } + if(ping < *min){ + *min = ping; + } + if(ping > 100.0){ + (*nb_high)++; + } + *sum += ping; +} + void set_stats_ping(){ - /* Variables */ - FILE* fd; - /* Open log file */ - fd = fopen("/var/log/ping-report/all-ping.log","r"); + FILE* fd = fopen("/var/log/ping-report/all-ping.log","r"); if(fd != NULL){ - /* Stats variables */ double ping = 0.0; double sum = 0.0; double max = 0.0; @@ -214,56 +223,27 @@ void set_stats_ping(){ char* read_line = NULL; size_t n = 0; - /* Read file */ + /* Read file line by line */ while(getline(&read_line,&n,fd) != -1){ - - /* Check getline error */ if(read_line == NULL){ break; } - - /* Check if the ping is flagged as LOSS */ - if(strcmp(read_line,"LOSS") == 0){ - nb_loss++; - }else{ - /* Evaluate the ping as a double */ - ping = strtod(read_line,NULL); - /* Test null ping */ - if(ping < 0.1){ - /* Ignore null ping */ - }else{ - /* Number of ping readed (for mean calculation) */ - nb_ping++; - /* Max ping */ - if(ping > max){ - max = ping; - } - /* Min ping */ - if(ping < min){ - min = ping; - } - /* Number of ping above 100 ms */ - if(ping > 100.0){ - nb_high++; - } - /* Sum (for mean calculation) */ - sum += ping; - } - } + process_ping_line(read_line, &sum, &max, &min, &nb_high, &nb_loss, &nb_ping); free(read_line); + read_line = NULL; n = 0; } - /* Mean calculation */ - mean = sum / (double) nb_ping; - (void) fclose(fd); + if(nb_ping > 0){ + mean = sum / (double) nb_ping; + } + fclose(fd); insert_hourly_report(mean,max,min,nb_high,nb_loss,nb_ping); if(read_line != NULL){ free(read_line); } - }else{ perror("stats : "); }