fixed issues with the prometheus code with subtle errors due to the use
of pointers. Now using simple value objects.
This commit is contained in:
parent
813d398ea4
commit
f823d4b67b
@ -15,8 +15,8 @@ const NAMESPACE = "converge"
|
||||
var (
|
||||
// remember previous values of agent guids and clients so that we can increment
|
||||
// the cumulative counters.
|
||||
lastAgents map[string]*models.Agent = make(map[string]*models.Agent)
|
||||
lastClients map[string]*models.Client = make(map[string]*models.Client)
|
||||
lastAgents map[string]models.Agent = make(map[string]models.Agent)
|
||||
lastClients map[string]models.Client = make(map[string]models.Client)
|
||||
|
||||
cumulativeAgentCount = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Namespace: NAMESPACE,
|
||||
@ -130,19 +130,32 @@ func clientLabels(client models.Client) prometheus.Labels {
|
||||
}
|
||||
|
||||
func agentActive(agent models.Agent) {
|
||||
prevAgent := lastAgents[agent.Guid]
|
||||
if prevAgent != nil && *prevAgent != agent {
|
||||
removeAgentMetrics(prevAgent)
|
||||
prevAgent, ok := lastAgents[agent.Guid]
|
||||
if ok && prevAgent != agent {
|
||||
removeAgentInfoMetrics(prevAgent)
|
||||
}
|
||||
agentInfo.With(agentLabels(agent)).Set(1)
|
||||
agentStartTime.
|
||||
With(prometheus.Labels{"agent_guid": agent.Guid}).
|
||||
Set(float64(agent.StartTime.UnixMilli()))
|
||||
agentDuration.
|
||||
With(prometheus.Labels{"agent_guid": agent.Guid}).
|
||||
Set(float64(time.Now().Sub(agent.StartTime).Seconds()))
|
||||
}
|
||||
|
||||
func clientActive(client models.Client) {
|
||||
prevClient := lastClients[client.Guid]
|
||||
if prevClient != nil && *prevClient != client {
|
||||
removeClientMetrics(prevClient)
|
||||
prevClient, ok := lastClients[client.Guid]
|
||||
if ok && prevClient != client {
|
||||
removeClientInfoMetrics(prevClient)
|
||||
}
|
||||
clientInfo.With(clientLabels(client)).Set(1)
|
||||
|
||||
clientStartTime.
|
||||
With(prometheus.Labels{"client_guid": client.Guid}).
|
||||
Set(float64(client.StartTime.UnixMilli()))
|
||||
clientDuration.
|
||||
With(prometheus.Labels{"client_guid": client.Guid}).
|
||||
Set(float64(time.Now().Sub(client.StartTime).Seconds()))
|
||||
}
|
||||
|
||||
func setupPrometheus(mux *http.ServeMux, notifications chan *models.State) {
|
||||
@ -203,27 +216,21 @@ func updateMetricsImpl(state *models.State) {
|
||||
// so that is still possible to identify the client or agent even though some values might
|
||||
// become 0.
|
||||
|
||||
agentGuids := make(map[string]*models.Agent)
|
||||
clientGuids := make(map[string]*models.Client)
|
||||
agentGuids := make(map[string]models.Agent)
|
||||
clientGuids := make(map[string]models.Client)
|
||||
|
||||
agentCount.Set(float64(len(state.Agents)))
|
||||
disconnectedAgents := make(map[string]*models.Agent)
|
||||
disconnectedAgents := make(map[string]models.Agent)
|
||||
for k, v := range lastAgents {
|
||||
disconnectedAgents[k] = v
|
||||
}
|
||||
for _, agent := range state.Agents {
|
||||
if lastAgents[agent.Guid] == nil {
|
||||
if _, ok := lastAgents[agent.Guid]; !ok {
|
||||
cumulativeAgentCount.Inc()
|
||||
}
|
||||
delete(disconnectedAgents, agent.Guid)
|
||||
agentGuids[agent.Guid] = &agent
|
||||
agentGuids[agent.Guid] = agent
|
||||
agentActive(agent)
|
||||
agentStartTime.
|
||||
With(prometheus.Labels{"agent_guid": agent.Guid}).
|
||||
Set(float64(agent.StartTime.UnixMilli()))
|
||||
agentDuration.
|
||||
With(prometheus.Labels{"agent_guid": agent.Guid}).
|
||||
Set(float64(time.Now().Sub(agent.StartTime).Seconds()))
|
||||
}
|
||||
for _, agent := range disconnectedAgents {
|
||||
removeAgentMetrics(agent)
|
||||
@ -233,23 +240,17 @@ func updateMetricsImpl(state *models.State) {
|
||||
clientCount.Set(float64(len(state.Clients)))
|
||||
|
||||
// with this app
|
||||
disconnectedClients := make(map[string]*models.Client)
|
||||
disconnectedClients := make(map[string]models.Client)
|
||||
for k, v := range lastClients {
|
||||
disconnectedClients[k] = v
|
||||
}
|
||||
for _, client := range state.Clients {
|
||||
if lastClients[client.Guid] == nil {
|
||||
if _, ok := lastClients[client.Guid]; !ok {
|
||||
cumulativeClientCount.Inc()
|
||||
}
|
||||
delete(disconnectedClients, client.Guid)
|
||||
clientGuids[client.Guid] = &client
|
||||
clientGuids[client.Guid] = client
|
||||
clientActive(client)
|
||||
clientStartTime.
|
||||
With(prometheus.Labels{"client_guid": client.Guid}).
|
||||
Set(float64(client.StartTime.UnixMilli()))
|
||||
clientDuration.
|
||||
With(prometheus.Labels{"client_guid": client.Guid}).
|
||||
Set(float64(time.Now().Sub(client.StartTime).Seconds()))
|
||||
}
|
||||
for _, client := range disconnectedClients {
|
||||
removeClientMetrics(client)
|
||||
@ -257,8 +258,12 @@ func updateMetricsImpl(state *models.State) {
|
||||
lastClients = clientGuids
|
||||
}
|
||||
|
||||
func removeAgentMetrics(agent *models.Agent) {
|
||||
ok1 := agentInfo.Delete(agentLabels(*agent))
|
||||
func removeAgentInfoMetrics(agent models.Agent) bool {
|
||||
return agentInfo.Delete(agentLabels(agent))
|
||||
}
|
||||
|
||||
func removeAgentMetrics(agent models.Agent) {
|
||||
ok1 := removeAgentInfoMetrics(agent)
|
||||
guidLabels := prometheus.Labels{"agent_guid": agent.Guid}
|
||||
ok2 := agentStartTime.Delete(guidLabels)
|
||||
// delayed deletion of the duration sow we are sure the prometheus has the last data.
|
||||
@ -270,12 +275,17 @@ func removeAgentMetrics(agent *models.Agent) {
|
||||
}
|
||||
}()
|
||||
if !ok1 || !ok2 {
|
||||
log.Printf("Could not delete all timeseries for agent %s", agent.Guid)
|
||||
log.Printf("Could not delete all timeseries for agent %s (info %v, starttime %v) ",
|
||||
agent.Guid, ok1, ok2)
|
||||
}
|
||||
}
|
||||
|
||||
func removeClientMetrics(client *models.Client) {
|
||||
ok1 := clientInfo.Delete(clientLabels(*client))
|
||||
func removeClientInfoMetrics(client models.Client) bool {
|
||||
return clientInfo.Delete(clientLabels(client))
|
||||
}
|
||||
|
||||
func removeClientMetrics(client models.Client) {
|
||||
ok1 := removeClientInfoMetrics(client)
|
||||
guidLabels := prometheus.Labels{"client_guid": client.Guid}
|
||||
ok2 := clientStartTime.Delete(guidLabels)
|
||||
// delayed deletion of the duration sow we are sure the prometheus has the last data.
|
||||
@ -287,6 +297,6 @@ func removeClientMetrics(client *models.Client) {
|
||||
}
|
||||
}()
|
||||
if !ok1 || !ok2 {
|
||||
log.Printf("Could not delete all timeseries for client %s", client.Guid)
|
||||
log.Printf("Could not delete all timeseries for client %s (info %v, starttime %v)", client.Guid, ok1, ok2)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user