diff --git a/cmd/converge/prometheus.go b/cmd/converge/prometheus.go index d5db9d3..e338e09 100644 --- a/cmd/converge/prometheus.go +++ b/cmd/converge/prometheus.go @@ -15,8 +15,8 @@ const NAMESPACE = "converge" var ( // remember previous values of agent guids and clients so that we can increment // the cumulative counters. - lastAgents map[string]*models.Agent = make(map[string]*models.Agent) - lastClients map[string]*models.Client = make(map[string]*models.Client) + lastAgents map[string]models.Agent = make(map[string]models.Agent) + lastClients map[string]models.Client = make(map[string]models.Client) cumulativeAgentCount = promauto.NewCounter(prometheus.CounterOpts{ Namespace: NAMESPACE, @@ -130,19 +130,32 @@ func clientLabels(client models.Client) prometheus.Labels { } func agentActive(agent models.Agent) { - prevAgent := lastAgents[agent.Guid] - if prevAgent != nil && *prevAgent != agent { - removeAgentMetrics(prevAgent) + prevAgent, ok := lastAgents[agent.Guid] + if ok && prevAgent != agent { + removeAgentInfoMetrics(prevAgent) } agentInfo.With(agentLabels(agent)).Set(1) + agentStartTime. + With(prometheus.Labels{"agent_guid": agent.Guid}). + Set(float64(agent.StartTime.UnixMilli())) + agentDuration. + With(prometheus.Labels{"agent_guid": agent.Guid}). + Set(float64(time.Now().Sub(agent.StartTime).Seconds())) } func clientActive(client models.Client) { - prevClient := lastClients[client.Guid] - if prevClient != nil && *prevClient != client { - removeClientMetrics(prevClient) + prevClient, ok := lastClients[client.Guid] + if ok && prevClient != client { + removeClientInfoMetrics(prevClient) } clientInfo.With(clientLabels(client)).Set(1) + + clientStartTime. + With(prometheus.Labels{"client_guid": client.Guid}). + Set(float64(client.StartTime.UnixMilli())) + clientDuration. + With(prometheus.Labels{"client_guid": client.Guid}). + Set(float64(time.Now().Sub(client.StartTime).Seconds())) } func setupPrometheus(mux *http.ServeMux, notifications chan *models.State) { @@ -203,27 +216,21 @@ func updateMetricsImpl(state *models.State) { // so that is still possible to identify the client or agent even though some values might // become 0. - agentGuids := make(map[string]*models.Agent) - clientGuids := make(map[string]*models.Client) + agentGuids := make(map[string]models.Agent) + clientGuids := make(map[string]models.Client) agentCount.Set(float64(len(state.Agents))) - disconnectedAgents := make(map[string]*models.Agent) + disconnectedAgents := make(map[string]models.Agent) for k, v := range lastAgents { disconnectedAgents[k] = v } for _, agent := range state.Agents { - if lastAgents[agent.Guid] == nil { + if _, ok := lastAgents[agent.Guid]; !ok { cumulativeAgentCount.Inc() } delete(disconnectedAgents, agent.Guid) - agentGuids[agent.Guid] = &agent + agentGuids[agent.Guid] = agent agentActive(agent) - agentStartTime. - With(prometheus.Labels{"agent_guid": agent.Guid}). - Set(float64(agent.StartTime.UnixMilli())) - agentDuration. - With(prometheus.Labels{"agent_guid": agent.Guid}). - Set(float64(time.Now().Sub(agent.StartTime).Seconds())) } for _, agent := range disconnectedAgents { removeAgentMetrics(agent) @@ -233,23 +240,17 @@ func updateMetricsImpl(state *models.State) { clientCount.Set(float64(len(state.Clients))) // with this app - disconnectedClients := make(map[string]*models.Client) + disconnectedClients := make(map[string]models.Client) for k, v := range lastClients { disconnectedClients[k] = v } for _, client := range state.Clients { - if lastClients[client.Guid] == nil { + if _, ok := lastClients[client.Guid]; !ok { cumulativeClientCount.Inc() } delete(disconnectedClients, client.Guid) - clientGuids[client.Guid] = &client + clientGuids[client.Guid] = client clientActive(client) - clientStartTime. - With(prometheus.Labels{"client_guid": client.Guid}). - Set(float64(client.StartTime.UnixMilli())) - clientDuration. - With(prometheus.Labels{"client_guid": client.Guid}). - Set(float64(time.Now().Sub(client.StartTime).Seconds())) } for _, client := range disconnectedClients { removeClientMetrics(client) @@ -257,8 +258,12 @@ func updateMetricsImpl(state *models.State) { lastClients = clientGuids } -func removeAgentMetrics(agent *models.Agent) { - ok1 := agentInfo.Delete(agentLabels(*agent)) +func removeAgentInfoMetrics(agent models.Agent) bool { + return agentInfo.Delete(agentLabels(agent)) +} + +func removeAgentMetrics(agent models.Agent) { + ok1 := removeAgentInfoMetrics(agent) guidLabels := prometheus.Labels{"agent_guid": agent.Guid} ok2 := agentStartTime.Delete(guidLabels) // delayed deletion of the duration sow we are sure the prometheus has the last data. @@ -270,12 +275,17 @@ func removeAgentMetrics(agent *models.Agent) { } }() if !ok1 || !ok2 { - log.Printf("Could not delete all timeseries for agent %s", agent.Guid) + log.Printf("Could not delete all timeseries for agent %s (info %v, starttime %v) ", + agent.Guid, ok1, ok2) } } -func removeClientMetrics(client *models.Client) { - ok1 := clientInfo.Delete(clientLabels(*client)) +func removeClientInfoMetrics(client models.Client) bool { + return clientInfo.Delete(clientLabels(client)) +} + +func removeClientMetrics(client models.Client) { + ok1 := removeClientInfoMetrics(client) guidLabels := prometheus.Labels{"client_guid": client.Guid} ok2 := clientStartTime.Delete(guidLabels) // delayed deletion of the duration sow we are sure the prometheus has the last data. @@ -287,6 +297,6 @@ func removeClientMetrics(client *models.Client) { } }() if !ok1 || !ok2 { - log.Printf("Could not delete all timeseries for client %s", client.Guid) + log.Printf("Could not delete all timeseries for client %s (info %v, starttime %v)", client.Guid, ok1, ok2) } }