forked from menault/TD2_DEV51_Qualite_Algo
maj
This commit is contained in:
241
f.py
Normal file
241
f.py
Normal file
@@ -0,0 +1,241 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Simulation du processus de création de liens entre pages web sous l'influence de moteurs de recherche,
|
||||
conformément à l'énoncé "Modélisations mathématiques — BUT3 Info Fontainebleau".
|
||||
|
||||
Fonctionnalités :
|
||||
- Initialisation d'un graphe orienté de n pages avec pertinence p(x) dans [0,1]
|
||||
- Scores de recherche au choix : "indegree_plus_one" (degré entrant + 1, normalisé) ou "pagerank"
|
||||
- À chaque itération : on choisit x au hasard, puis y selon la distribution des scores s(y),
|
||||
puis on ajoute le lien x->y avec probabilité p(y)
|
||||
- Politiques : interdiction des boucles x->x, politique de remplacement optionnelle
|
||||
- Arrêt : quand chaque page a au moins ceil(ln n) liens sortants OU stagnation
|
||||
- Visualisation : graphe final (taille des nœuds = score), historique des nouveaux liens
|
||||
- Export CSV et PNG
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
import math
|
||||
import pandas as pd
|
||||
import networkx as nx
|
||||
import matplotlib.pyplot as plt
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Tuple, Dict, List
|
||||
|
||||
# ----------------------------
|
||||
# Configuration de simulation
|
||||
# ----------------------------
|
||||
|
||||
@dataclass
|
||||
class WebSimulationConfig:
|
||||
n: int = 30
|
||||
seed: Optional[int] = 42
|
||||
score_mode: str = "indegree_plus_one" # "indegree_plus_one" | "pagerank"
|
||||
use_realistic_p: bool = False
|
||||
beta_params: Tuple[float, float] = (2.0, 5.0)
|
||||
max_iters: int = 20000
|
||||
min_out_links: Optional[int] = None
|
||||
stagnation_patience: int = 500
|
||||
allow_self_loops: bool = False
|
||||
replacement_policy: str = "none" # "none" | "replace_worst_if_better"
|
||||
pagerank_damping: float = 0.85
|
||||
pagerank_tol: float = 1.0e-08
|
||||
pagerank_max_iter: int = 100
|
||||
|
||||
# ----------------------------
|
||||
# Simulation
|
||||
# ----------------------------
|
||||
|
||||
class WebSimulation:
|
||||
def __init__(self, config: WebSimulationConfig):
|
||||
self.cfg = config
|
||||
if self.cfg.seed is not None:
|
||||
np.random.seed(self.cfg.seed)
|
||||
random.seed(self.cfg.seed)
|
||||
self.G = nx.DiGraph()
|
||||
self.G.add_nodes_from(range(self.cfg.n))
|
||||
if self.cfg.use_realistic_p:
|
||||
a, b = self.cfg.beta_params
|
||||
p_vals = np.random.beta(a, b, size=self.cfg.n)
|
||||
else:
|
||||
p_vals = np.random.rand(self.cfg.n)
|
||||
self.p = {i: float(p_vals[i]) for i in range(self.cfg.n)}
|
||||
self.scores = {i: 1.0 / self.cfg.n for i in range(self.cfg.n)}
|
||||
self.min_out_links = (math.ceil(math.log(self.cfg.n)) if self.cfg.min_out_links is None
|
||||
else self.cfg.min_out_links)
|
||||
|
||||
def _compute_scores_indegree_plus_one(self) -> Dict[int, float]:
|
||||
raw = np.array([self.G.in_degree(i) + 1 for i in range(self.cfg.n)], dtype=float)
|
||||
total = raw.sum()
|
||||
if total <= 0:
|
||||
return {i: 1.0 / self.cfg.n for i in range(self.cfg.n)}
|
||||
return {i: float(raw[i] / total) for i in range(self.cfg.n)}
|
||||
|
||||
def _compute_scores_pagerank(self) -> Dict[int, float]:
|
||||
try:
|
||||
pr = nx.pagerank(self.G, alpha=self.cfg.pagerank_damping,
|
||||
tol=self.cfg.pagerank_tol,
|
||||
max_iter=self.cfg.pagerank_max_iter)
|
||||
except nx.PowerIterationFailedConvergence:
|
||||
pr = {i: 1.0 / self.cfg.n for i in range(self.cfg.n)}
|
||||
return pr
|
||||
|
||||
def recompute_scores(self) -> None:
|
||||
if self.cfg.score_mode == "indegree_plus_one":
|
||||
self.scores = self._compute_scores_indegree_plus_one()
|
||||
elif self.cfg.score_mode == "pagerank":
|
||||
self.scores = self._compute_scores_pagerank()
|
||||
else:
|
||||
raise ValueError("Unknown score_mode")
|
||||
|
||||
def _choose_y_weighted_by_scores(self) -> int:
|
||||
nodes = list(range(self.cfg.n))
|
||||
weights = np.array([self.scores[i] for i in nodes], dtype=float)
|
||||
weights = weights / weights.sum()
|
||||
return int(np.random.choice(nodes, p=weights))
|
||||
|
||||
def _maybe_add_edge(self, x: int, y: int) -> bool:
|
||||
if (not self.cfg.allow_self_loops) and (x == y):
|
||||
return False
|
||||
accept = (random.random() <= self.p[y])
|
||||
if not accept:
|
||||
return False
|
||||
|
||||
if self.cfg.replacement_policy == "none":
|
||||
if not self.G.has_edge(x, y):
|
||||
self.G.add_edge(x, y, weight=1.0)
|
||||
return True
|
||||
return False
|
||||
|
||||
if self.cfg.replacement_policy == "replace_worst_if_better":
|
||||
out_neighbors = list(self.G.successors(x))
|
||||
if y in out_neighbors:
|
||||
return False
|
||||
if len(out_neighbors) < self.min_out_links:
|
||||
self.G.add_edge(x, y, weight=1.0)
|
||||
return True
|
||||
worst = min(out_neighbors, key=lambda t: self.p[t]) if out_neighbors else None
|
||||
if worst is None:
|
||||
self.G.add_edge(x, y, weight=1.0)
|
||||
return True
|
||||
if self.p[y] > self.p[worst]:
|
||||
self.G.remove_edge(x, worst)
|
||||
self.G.add_edge(x, y, weight=1.0)
|
||||
return True
|
||||
return False
|
||||
|
||||
raise ValueError("Unknown replacement_policy")
|
||||
|
||||
def _meets_stopping_condition(self, stagnation_steps: int) -> bool:
|
||||
if all(self.G.out_degree(i) >= self.min_out_links for i in range(self.cfg.n)):
|
||||
return True
|
||||
if stagnation_steps >= self.cfg.stagnation_patience:
|
||||
return True
|
||||
return False
|
||||
|
||||
def run(self):
|
||||
history_new_edges = []
|
||||
stagnation = 0
|
||||
iters = 0
|
||||
while iters < self.cfg.max_iters:
|
||||
x = random.randrange(self.cfg.n)
|
||||
y = self._choose_y_weighted_by_scores()
|
||||
changed = self._maybe_add_edge(x, y)
|
||||
history_new_edges.append(1 if changed else 0)
|
||||
if changed:
|
||||
stagnation = 0
|
||||
self.recompute_scores()
|
||||
else:
|
||||
stagnation += 1
|
||||
iters += 1
|
||||
if self._meets_stopping_condition(stagnation):
|
||||
break
|
||||
self.recompute_scores()
|
||||
return self.G, self.p, self.scores, history_new_edges, iters
|
||||
|
||||
# ----------------------------
|
||||
# Outils d'analyse & tracés
|
||||
# ----------------------------
|
||||
|
||||
def summarize(G, p, scores) -> pd.DataFrame:
|
||||
rows = []
|
||||
for i in G.nodes():
|
||||
rows.append({
|
||||
"node": i,
|
||||
"pertinence_p": p[i],
|
||||
"score": scores[i],
|
||||
"in_degree": G.in_degree(i),
|
||||
"out_degree": G.out_degree(i),
|
||||
})
|
||||
return pd.DataFrame(rows).sort_values(["score", "pertinence_p"], ascending=[False, False]).reset_index(drop=True)
|
||||
|
||||
def plot_expected_graph(G, scores, title="Graphe attendu (taille = score)"):
|
||||
# Disposition circulaire stricte
|
||||
pos = nx.circular_layout(G)
|
||||
|
||||
# Taille des nœuds proportionnelle aux scores
|
||||
score_vals = np.array([scores[i] for i in G.nodes()])
|
||||
node_sizes = 3000 * (score_vals / score_vals.max() + 0.05)
|
||||
|
||||
plt.figure(figsize=(7, 7))
|
||||
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color="dodgerblue")
|
||||
nx.draw_networkx_edges(
|
||||
G, pos, arrows=True, arrowstyle="-|>", arrowsize=8,
|
||||
edge_color="black", width=0.8
|
||||
)
|
||||
plt.axis("off")
|
||||
plt.title(title)
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
def plot_history(history):
|
||||
plt.figure(figsize=(10, 3))
|
||||
plt.plot(history)
|
||||
plt.title("Nouveaux liens (1) vs itérations")
|
||||
plt.xlabel("Itération")
|
||||
plt.ylabel("Nouveau lien ?")
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
# ----------------------------
|
||||
# Main
|
||||
# ----------------------------
|
||||
|
||||
def main():
|
||||
cfg = WebSimulationConfig(
|
||||
n=30,
|
||||
seed=123,
|
||||
score_mode="indegree_plus_one", # ou "pagerank"
|
||||
use_realistic_p=True,
|
||||
beta_params=(2.5, 4.0),
|
||||
max_iters=10000,
|
||||
min_out_links=None, # -> ceil(ln n)
|
||||
stagnation_patience=1000,
|
||||
allow_self_loops=False,
|
||||
replacement_policy="replace_worst_if_better",
|
||||
)
|
||||
sim = WebSimulation(cfg)
|
||||
G, p, scores, history, iters = sim.run()
|
||||
print(f"Itérations effectuées: {iters}")
|
||||
df = summarize(G, p, scores)
|
||||
print(df.head(10))
|
||||
# Figures
|
||||
plot_expected_graph(G, scores)
|
||||
plot_history(history)
|
||||
# Exports
|
||||
df.to_csv("resume_pages.csv", index=False)
|
||||
plt.figure()
|
||||
plot_expected_graph(G, scores, title="Graphe final exporté")
|
||||
plt.savefig("graphe_final.png", dpi=150, bbox_inches="tight")
|
||||
plt.close()
|
||||
plt.figure()
|
||||
plot_history(history)
|
||||
plt.savefig("historique_liens.png", dpi=150, bbox_inches="tight")
|
||||
plt.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -22,130 +22,106 @@
|
||||
Return value :
|
||||
Ping value as a string or NULL if an error occured
|
||||
*/
|
||||
void whil(FILE* fd,
|
||||
char* read_line,
|
||||
size_t n ,
|
||||
size_t nmatch,
|
||||
regex_t *p_reg,
|
||||
|
||||
static char* extract_ping_from_line(
|
||||
const char* line,
|
||||
regex_t* p_reg,
|
||||
regmatch_t* pmatch,
|
||||
char* ping,
|
||||
int start,
|
||||
int end,
|
||||
size_t size_ping){
|
||||
size_t nmatch
|
||||
){
|
||||
int start, end;
|
||||
size_t size_ping;
|
||||
char* ping = NULL;
|
||||
|
||||
|
||||
while(getline(&read_line,&n,fd) != -1){
|
||||
|
||||
if(read_line == NULL){
|
||||
break;
|
||||
}
|
||||
|
||||
/* Exec regex to find ping */
|
||||
|
||||
if(regexec(p_reg,read_line,nmatch,pmatch,0) == 0){
|
||||
|
||||
/* Extract ping position from read line */
|
||||
start = (int) pmatch[1].rm_so;
|
||||
end = (int) pmatch[1].rm_eo;
|
||||
size_ping = (size_t) (end - start);
|
||||
|
||||
/* ping string memory allocation */
|
||||
ping = malloc(sizeof(char) * (size_ping+2));
|
||||
if(ping == NULL){
|
||||
free(read_line);
|
||||
read_line = NULL;
|
||||
n = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Create ping string */
|
||||
(void) strncpy(ping, &read_line[start], size_ping);
|
||||
ping[size_ping]='\n';
|
||||
ping[size_ping+1]='\0';
|
||||
|
||||
/* Free memory */
|
||||
free(read_line);
|
||||
read_line = NULL;
|
||||
n = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
free(read_line);
|
||||
read_line = NULL;
|
||||
n = 0;
|
||||
if(regexec(p_reg, line, nmatch, pmatch, 0) != 0){
|
||||
return NULL; /* no match */
|
||||
}
|
||||
}
|
||||
/*@null@*/char* get_ping_from_temp_log(){
|
||||
|
||||
/* Variables */
|
||||
/* Extract ping position from line */
|
||||
start = (int) pmatch[1].rm_so;
|
||||
end = (int) pmatch[1].rm_eo;
|
||||
size_ping = (size_t)(end - start);
|
||||
|
||||
/* Allocate ping string */
|
||||
ping = malloc(sizeof(char) * (size_ping + 2));
|
||||
if(ping == NULL){
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Copy substring */
|
||||
(void) strncpy(ping, &line[start], size_ping);
|
||||
ping[size_ping] = '\n';
|
||||
ping[size_ping+1] = '\0';
|
||||
|
||||
return ping;
|
||||
}
|
||||
|
||||
|
||||
char* get_ping_from_temp_log(){
|
||||
|
||||
FILE* fd = NULL;
|
||||
char* read_line = NULL;
|
||||
size_t n = 0;
|
||||
size_t nmatch = 2;
|
||||
regex_t *p_reg;
|
||||
regmatch_t* pmatch;
|
||||
regex_t* p_reg = NULL;
|
||||
regmatch_t* pmatch = NULL;
|
||||
char* ping = NULL;
|
||||
int start;
|
||||
int end;
|
||||
size_t size_ping;
|
||||
|
||||
/* regex struct memory allocation */
|
||||
p_reg = (regex_t *) malloc(sizeof(*p_reg));
|
||||
/* regex struct allocation */
|
||||
p_reg = malloc(sizeof(*p_reg));
|
||||
if(p_reg == NULL){
|
||||
return ping; /* NULL */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Open ping log file */
|
||||
fd = fopen("/var/log/ping-report/last-ping.log","r");
|
||||
if(fd == NULL){
|
||||
free(p_reg);
|
||||
return ping; /* NULL */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Construct regex to get ping from log file */
|
||||
if(regcomp(p_reg,"time=(.*) ms",REG_EXTENDED) != 0){
|
||||
if(p_reg != NULL){
|
||||
free(p_reg);
|
||||
}
|
||||
(void) fclose(fd);
|
||||
return ping; /* NULL */
|
||||
if(regcomp(p_reg, "time=(.*) ms", REG_EXTENDED) != 0){
|
||||
free(p_reg);
|
||||
fclose(fd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* match info memory allocation */
|
||||
pmatch = malloc(sizeof(*pmatch) * nmatch);
|
||||
if(pmatch == NULL){
|
||||
(void) fclose(fd);
|
||||
regfree(p_reg);
|
||||
free(p_reg);
|
||||
return ping; /* NULL */
|
||||
fclose(fd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Read file */
|
||||
whil(FILE* fd,
|
||||
char* read_line,
|
||||
size_t n ,
|
||||
size_t nmatch,
|
||||
regex_t *p_reg,
|
||||
regmatch_t* pmatch,
|
||||
char* ping,
|
||||
int start,
|
||||
int end,
|
||||
size_t size_ping);
|
||||
while(getline(&read_line, &n, fd) != -1){
|
||||
if(read_line == NULL){
|
||||
break;
|
||||
}
|
||||
|
||||
/* free allocated memory */
|
||||
ping = extract_ping_from_line(read_line, p_reg, pmatch, nmatch);
|
||||
|
||||
free(read_line);
|
||||
read_line = NULL;
|
||||
n = 0;
|
||||
|
||||
if(ping != NULL){
|
||||
break; /* stop when ping found */
|
||||
}
|
||||
}
|
||||
|
||||
/* Cleanup */
|
||||
regfree(p_reg);
|
||||
free(p_reg);
|
||||
free(pmatch);
|
||||
if(read_line != NULL){
|
||||
free(read_line);
|
||||
}
|
||||
fclose(fd);
|
||||
|
||||
(void) fclose(fd);
|
||||
|
||||
/* ping may be null, then it must mean that the ping request was lost */
|
||||
return ping;
|
||||
return ping; /* May be NULL if not found */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
-- write_ping_log --
|
||||
Desc :
|
||||
@@ -194,15 +170,48 @@ void write_ping_log(char* new_ping){
|
||||
Return value :
|
||||
None
|
||||
*/
|
||||
|
||||
static void process_ping_line(
|
||||
const char* line,
|
||||
double* sum,
|
||||
double* max,
|
||||
double* min,
|
||||
int* nb_high,
|
||||
int* nb_loss,
|
||||
int* nb_ping
|
||||
){
|
||||
double ping;
|
||||
|
||||
if(strcmp(line,"LOSS") == 0){
|
||||
(*nb_loss)++;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Evaluate the ping as a double */
|
||||
ping = strtod(line,NULL);
|
||||
if(ping < 0.1){
|
||||
return; /* Ignore null ping */
|
||||
}
|
||||
|
||||
(*nb_ping)++;
|
||||
|
||||
if(ping > *max){
|
||||
*max = ping;
|
||||
}
|
||||
if(ping < *min){
|
||||
*min = ping;
|
||||
}
|
||||
if(ping > 100.0){
|
||||
(*nb_high)++;
|
||||
}
|
||||
*sum += ping;
|
||||
}
|
||||
|
||||
void set_stats_ping(){
|
||||
|
||||
/* Variables */
|
||||
FILE* fd;
|
||||
/* Open log file */
|
||||
fd = fopen("/var/log/ping-report/all-ping.log","r");
|
||||
FILE* fd = fopen("/var/log/ping-report/all-ping.log","r");
|
||||
|
||||
if(fd != NULL){
|
||||
/* Stats variables */
|
||||
double ping = 0.0;
|
||||
double sum = 0.0;
|
||||
double max = 0.0;
|
||||
@@ -214,56 +223,27 @@ void set_stats_ping(){
|
||||
char* read_line = NULL;
|
||||
size_t n = 0;
|
||||
|
||||
/* Read file */
|
||||
/* Read file line by line */
|
||||
while(getline(&read_line,&n,fd) != -1){
|
||||
|
||||
/* Check getline error */
|
||||
if(read_line == NULL){
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check if the ping is flagged as LOSS */
|
||||
if(strcmp(read_line,"LOSS") == 0){
|
||||
nb_loss++;
|
||||
}else{
|
||||
/* Evaluate the ping as a double */
|
||||
ping = strtod(read_line,NULL);
|
||||
/* Test null ping */
|
||||
if(ping < 0.1){
|
||||
/* Ignore null ping */
|
||||
}else{
|
||||
/* Number of ping readed (for mean calculation) */
|
||||
nb_ping++;
|
||||
/* Max ping */
|
||||
if(ping > max){
|
||||
max = ping;
|
||||
}
|
||||
/* Min ping */
|
||||
if(ping < min){
|
||||
min = ping;
|
||||
}
|
||||
/* Number of ping above 100 ms */
|
||||
if(ping > 100.0){
|
||||
nb_high++;
|
||||
}
|
||||
/* Sum (for mean calculation) */
|
||||
sum += ping;
|
||||
}
|
||||
}
|
||||
process_ping_line(read_line, &sum, &max, &min, &nb_high, &nb_loss, &nb_ping);
|
||||
free(read_line);
|
||||
read_line = NULL;
|
||||
n = 0;
|
||||
}
|
||||
|
||||
/* Mean calculation */
|
||||
mean = sum / (double) nb_ping;
|
||||
(void) fclose(fd);
|
||||
if(nb_ping > 0){
|
||||
mean = sum / (double) nb_ping;
|
||||
}
|
||||
fclose(fd);
|
||||
|
||||
insert_hourly_report(mean,max,min,nb_high,nb_loss,nb_ping);
|
||||
|
||||
if(read_line != NULL){
|
||||
free(read_line);
|
||||
}
|
||||
|
||||
}else{
|
||||
perror("stats : ");
|
||||
}
|
||||
|
Reference in New Issue
Block a user