This commit is contained in:
2025-09-16 11:09:06 +02:00
parent db41c94d58
commit 5e73e03486
2 changed files with 350 additions and 129 deletions

241
f.py Normal file
View File

@@ -0,0 +1,241 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Simulation du processus de création de liens entre pages web sous l'influence de moteurs de recherche,
conformément à l'énoncé "Modélisations mathématiques — BUT3 Info Fontainebleau".
Fonctionnalités :
- Initialisation d'un graphe orienté de n pages avec pertinence p(x) dans [0,1]
- Scores de recherche au choix : "indegree_plus_one" (degré entrant + 1, normalisé) ou "pagerank"
- À chaque itération : on choisit x au hasard, puis y selon la distribution des scores s(y),
puis on ajoute le lien x->y avec probabilité p(y)
- Politiques : interdiction des boucles x->x, politique de remplacement optionnelle
- Arrêt : quand chaque page a au moins ceil(ln n) liens sortants OU stagnation
- Visualisation : graphe final (taille des nœuds = score), historique des nouveaux liens
- Export CSV et PNG
"""
import numpy as np
import random
import math
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from dataclasses import dataclass
from typing import Optional, Tuple, Dict, List
# ----------------------------
# Configuration de simulation
# ----------------------------
@dataclass
class WebSimulationConfig:
n: int = 30
seed: Optional[int] = 42
score_mode: str = "indegree_plus_one" # "indegree_plus_one" | "pagerank"
use_realistic_p: bool = False
beta_params: Tuple[float, float] = (2.0, 5.0)
max_iters: int = 20000
min_out_links: Optional[int] = None
stagnation_patience: int = 500
allow_self_loops: bool = False
replacement_policy: str = "none" # "none" | "replace_worst_if_better"
pagerank_damping: float = 0.85
pagerank_tol: float = 1.0e-08
pagerank_max_iter: int = 100
# ----------------------------
# Simulation
# ----------------------------
class WebSimulation:
def __init__(self, config: WebSimulationConfig):
self.cfg = config
if self.cfg.seed is not None:
np.random.seed(self.cfg.seed)
random.seed(self.cfg.seed)
self.G = nx.DiGraph()
self.G.add_nodes_from(range(self.cfg.n))
if self.cfg.use_realistic_p:
a, b = self.cfg.beta_params
p_vals = np.random.beta(a, b, size=self.cfg.n)
else:
p_vals = np.random.rand(self.cfg.n)
self.p = {i: float(p_vals[i]) for i in range(self.cfg.n)}
self.scores = {i: 1.0 / self.cfg.n for i in range(self.cfg.n)}
self.min_out_links = (math.ceil(math.log(self.cfg.n)) if self.cfg.min_out_links is None
else self.cfg.min_out_links)
def _compute_scores_indegree_plus_one(self) -> Dict[int, float]:
raw = np.array([self.G.in_degree(i) + 1 for i in range(self.cfg.n)], dtype=float)
total = raw.sum()
if total <= 0:
return {i: 1.0 / self.cfg.n for i in range(self.cfg.n)}
return {i: float(raw[i] / total) for i in range(self.cfg.n)}
def _compute_scores_pagerank(self) -> Dict[int, float]:
try:
pr = nx.pagerank(self.G, alpha=self.cfg.pagerank_damping,
tol=self.cfg.pagerank_tol,
max_iter=self.cfg.pagerank_max_iter)
except nx.PowerIterationFailedConvergence:
pr = {i: 1.0 / self.cfg.n for i in range(self.cfg.n)}
return pr
def recompute_scores(self) -> None:
if self.cfg.score_mode == "indegree_plus_one":
self.scores = self._compute_scores_indegree_plus_one()
elif self.cfg.score_mode == "pagerank":
self.scores = self._compute_scores_pagerank()
else:
raise ValueError("Unknown score_mode")
def _choose_y_weighted_by_scores(self) -> int:
nodes = list(range(self.cfg.n))
weights = np.array([self.scores[i] for i in nodes], dtype=float)
weights = weights / weights.sum()
return int(np.random.choice(nodes, p=weights))
def _maybe_add_edge(self, x: int, y: int) -> bool:
if (not self.cfg.allow_self_loops) and (x == y):
return False
accept = (random.random() <= self.p[y])
if not accept:
return False
if self.cfg.replacement_policy == "none":
if not self.G.has_edge(x, y):
self.G.add_edge(x, y, weight=1.0)
return True
return False
if self.cfg.replacement_policy == "replace_worst_if_better":
out_neighbors = list(self.G.successors(x))
if y in out_neighbors:
return False
if len(out_neighbors) < self.min_out_links:
self.G.add_edge(x, y, weight=1.0)
return True
worst = min(out_neighbors, key=lambda t: self.p[t]) if out_neighbors else None
if worst is None:
self.G.add_edge(x, y, weight=1.0)
return True
if self.p[y] > self.p[worst]:
self.G.remove_edge(x, worst)
self.G.add_edge(x, y, weight=1.0)
return True
return False
raise ValueError("Unknown replacement_policy")
def _meets_stopping_condition(self, stagnation_steps: int) -> bool:
if all(self.G.out_degree(i) >= self.min_out_links for i in range(self.cfg.n)):
return True
if stagnation_steps >= self.cfg.stagnation_patience:
return True
return False
def run(self):
history_new_edges = []
stagnation = 0
iters = 0
while iters < self.cfg.max_iters:
x = random.randrange(self.cfg.n)
y = self._choose_y_weighted_by_scores()
changed = self._maybe_add_edge(x, y)
history_new_edges.append(1 if changed else 0)
if changed:
stagnation = 0
self.recompute_scores()
else:
stagnation += 1
iters += 1
if self._meets_stopping_condition(stagnation):
break
self.recompute_scores()
return self.G, self.p, self.scores, history_new_edges, iters
# ----------------------------
# Outils d'analyse & tracés
# ----------------------------
def summarize(G, p, scores) -> pd.DataFrame:
rows = []
for i in G.nodes():
rows.append({
"node": i,
"pertinence_p": p[i],
"score": scores[i],
"in_degree": G.in_degree(i),
"out_degree": G.out_degree(i),
})
return pd.DataFrame(rows).sort_values(["score", "pertinence_p"], ascending=[False, False]).reset_index(drop=True)
def plot_expected_graph(G, scores, title="Graphe attendu (taille = score)"):
# Disposition circulaire stricte
pos = nx.circular_layout(G)
# Taille des nœuds proportionnelle aux scores
score_vals = np.array([scores[i] for i in G.nodes()])
node_sizes = 3000 * (score_vals / score_vals.max() + 0.05)
plt.figure(figsize=(7, 7))
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color="dodgerblue")
nx.draw_networkx_edges(
G, pos, arrows=True, arrowstyle="-|>", arrowsize=8,
edge_color="black", width=0.8
)
plt.axis("off")
plt.title(title)
plt.tight_layout()
plt.show()
def plot_history(history):
plt.figure(figsize=(10, 3))
plt.plot(history)
plt.title("Nouveaux liens (1) vs itérations")
plt.xlabel("Itération")
plt.ylabel("Nouveau lien ?")
plt.tight_layout()
plt.show()
# ----------------------------
# Main
# ----------------------------
def main():
cfg = WebSimulationConfig(
n=30,
seed=123,
score_mode="indegree_plus_one", # ou "pagerank"
use_realistic_p=True,
beta_params=(2.5, 4.0),
max_iters=10000,
min_out_links=None, # -> ceil(ln n)
stagnation_patience=1000,
allow_self_loops=False,
replacement_policy="replace_worst_if_better",
)
sim = WebSimulation(cfg)
G, p, scores, history, iters = sim.run()
print(f"Itérations effectuées: {iters}")
df = summarize(G, p, scores)
print(df.head(10))
# Figures
plot_expected_graph(G, scores)
plot_history(history)
# Exports
df.to_csv("resume_pages.csv", index=False)
plt.figure()
plot_expected_graph(G, scores, title="Graphe final exporté")
plt.savefig("graphe_final.png", dpi=150, bbox_inches="tight")
plt.close()
plt.figure()
plot_history(history)
plt.savefig("historique_liens.png", dpi=150, bbox_inches="tight")
plt.close()
if __name__ == "__main__":
main()

View File

@@ -22,130 +22,106 @@
Return value :
Ping value as a string or NULL if an error occured
*/
void whil(FILE* fd,
char* read_line,
size_t n ,
size_t nmatch,
static char* extract_ping_from_line(
const char* line,
regex_t* p_reg,
regmatch_t* pmatch,
char* ping,
int start,
int end,
size_t size_ping){
size_t nmatch
){
int start, end;
size_t size_ping;
char* ping = NULL;
while(getline(&read_line,&n,fd) != -1){
if(read_line == NULL){
break;
if(regexec(p_reg, line, nmatch, pmatch, 0) != 0){
return NULL; /* no match */
}
/* Exec regex to find ping */
if(regexec(p_reg,read_line,nmatch,pmatch,0) == 0){
/* Extract ping position from read line */
/* Extract ping position from line */
start = (int) pmatch[1].rm_so;
end = (int) pmatch[1].rm_eo;
size_ping = (size_t)(end - start);
/* ping string memory allocation */
/* Allocate ping string */
ping = malloc(sizeof(char) * (size_ping + 2));
if(ping == NULL){
free(read_line);
read_line = NULL;
n = 0;
break;
return NULL;
}
/* Create ping string */
(void) strncpy(ping, &read_line[start], size_ping);
/* Copy substring */
(void) strncpy(ping, &line[start], size_ping);
ping[size_ping] = '\n';
ping[size_ping+1] = '\0';
/* Free memory */
free(read_line);
read_line = NULL;
n = 0;
break;
return ping;
}
free(read_line);
read_line = NULL;
n = 0;
}
}
/*@null@*/char* get_ping_from_temp_log(){
/* Variables */
char* get_ping_from_temp_log(){
FILE* fd = NULL;
char* read_line = NULL;
size_t n = 0;
size_t nmatch = 2;
regex_t *p_reg;
regmatch_t* pmatch;
regex_t* p_reg = NULL;
regmatch_t* pmatch = NULL;
char* ping = NULL;
int start;
int end;
size_t size_ping;
/* regex struct memory allocation */
p_reg = (regex_t *) malloc(sizeof(*p_reg));
/* regex struct allocation */
p_reg = malloc(sizeof(*p_reg));
if(p_reg == NULL){
return ping; /* NULL */
return NULL;
}
/* Open ping log file */
fd = fopen("/var/log/ping-report/last-ping.log","r");
if(fd == NULL){
free(p_reg);
return ping; /* NULL */
return NULL;
}
/* Construct regex to get ping from log file */
if(regcomp(p_reg, "time=(.*) ms", REG_EXTENDED) != 0){
if(p_reg != NULL){
free(p_reg);
}
(void) fclose(fd);
return ping; /* NULL */
fclose(fd);
return NULL;
}
/* match info memory allocation */
pmatch = malloc(sizeof(*pmatch) * nmatch);
if(pmatch == NULL){
(void) fclose(fd);
regfree(p_reg);
free(p_reg);
return ping; /* NULL */
fclose(fd);
return NULL;
}
/* Read file */
whil(FILE* fd,
char* read_line,
size_t n ,
size_t nmatch,
regex_t *p_reg,
regmatch_t* pmatch,
char* ping,
int start,
int end,
size_t size_ping);
while(getline(&read_line, &n, fd) != -1){
if(read_line == NULL){
break;
}
/* free allocated memory */
ping = extract_ping_from_line(read_line, p_reg, pmatch, nmatch);
free(read_line);
read_line = NULL;
n = 0;
if(ping != NULL){
break; /* stop when ping found */
}
}
/* Cleanup */
regfree(p_reg);
free(p_reg);
free(pmatch);
if(read_line != NULL){
free(read_line);
}
fclose(fd);
(void) fclose(fd);
/* ping may be null, then it must mean that the ping request was lost */
return ping;
return ping; /* May be NULL if not found */
}
/*
-- write_ping_log --
Desc :
@@ -194,15 +170,48 @@ void write_ping_log(char* new_ping){
Return value :
None
*/
static void process_ping_line(
const char* line,
double* sum,
double* max,
double* min,
int* nb_high,
int* nb_loss,
int* nb_ping
){
double ping;
if(strcmp(line,"LOSS") == 0){
(*nb_loss)++;
return;
}
/* Evaluate the ping as a double */
ping = strtod(line,NULL);
if(ping < 0.1){
return; /* Ignore null ping */
}
(*nb_ping)++;
if(ping > *max){
*max = ping;
}
if(ping < *min){
*min = ping;
}
if(ping > 100.0){
(*nb_high)++;
}
*sum += ping;
}
void set_stats_ping(){
/* Variables */
FILE* fd;
/* Open log file */
fd = fopen("/var/log/ping-report/all-ping.log","r");
FILE* fd = fopen("/var/log/ping-report/all-ping.log","r");
if(fd != NULL){
/* Stats variables */
double ping = 0.0;
double sum = 0.0;
double max = 0.0;
@@ -214,56 +223,27 @@ void set_stats_ping(){
char* read_line = NULL;
size_t n = 0;
/* Read file */
/* Read file line by line */
while(getline(&read_line,&n,fd) != -1){
/* Check getline error */
if(read_line == NULL){
break;
}
/* Check if the ping is flagged as LOSS */
if(strcmp(read_line,"LOSS") == 0){
nb_loss++;
}else{
/* Evaluate the ping as a double */
ping = strtod(read_line,NULL);
/* Test null ping */
if(ping < 0.1){
/* Ignore null ping */
}else{
/* Number of ping readed (for mean calculation) */
nb_ping++;
/* Max ping */
if(ping > max){
max = ping;
}
/* Min ping */
if(ping < min){
min = ping;
}
/* Number of ping above 100 ms */
if(ping > 100.0){
nb_high++;
}
/* Sum (for mean calculation) */
sum += ping;
}
}
process_ping_line(read_line, &sum, &max, &min, &nb_high, &nb_loss, &nb_ping);
free(read_line);
read_line = NULL;
n = 0;
}
/* Mean calculation */
if(nb_ping > 0){
mean = sum / (double) nb_ping;
(void) fclose(fd);
}
fclose(fd);
insert_hourly_report(mean,max,min,nb_high,nb_loss,nb_ping);
if(read_line != NULL){
free(read_line);
}
}else{
perror("stats : ");
}