fixed issues with the prometheus code with subtle errors due to the use
of pointers. Now using simple value objects.
This commit is contained in:
parent
813d398ea4
commit
f823d4b67b
@ -15,8 +15,8 @@ const NAMESPACE = "converge"
|
|||||||
var (
|
var (
|
||||||
// remember previous values of agent guids and clients so that we can increment
|
// remember previous values of agent guids and clients so that we can increment
|
||||||
// the cumulative counters.
|
// the cumulative counters.
|
||||||
lastAgents map[string]*models.Agent = make(map[string]*models.Agent)
|
lastAgents map[string]models.Agent = make(map[string]models.Agent)
|
||||||
lastClients map[string]*models.Client = make(map[string]*models.Client)
|
lastClients map[string]models.Client = make(map[string]models.Client)
|
||||||
|
|
||||||
cumulativeAgentCount = promauto.NewCounter(prometheus.CounterOpts{
|
cumulativeAgentCount = promauto.NewCounter(prometheus.CounterOpts{
|
||||||
Namespace: NAMESPACE,
|
Namespace: NAMESPACE,
|
||||||
@ -130,19 +130,32 @@ func clientLabels(client models.Client) prometheus.Labels {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func agentActive(agent models.Agent) {
|
func agentActive(agent models.Agent) {
|
||||||
prevAgent := lastAgents[agent.Guid]
|
prevAgent, ok := lastAgents[agent.Guid]
|
||||||
if prevAgent != nil && *prevAgent != agent {
|
if ok && prevAgent != agent {
|
||||||
removeAgentMetrics(prevAgent)
|
removeAgentInfoMetrics(prevAgent)
|
||||||
}
|
}
|
||||||
agentInfo.With(agentLabels(agent)).Set(1)
|
agentInfo.With(agentLabels(agent)).Set(1)
|
||||||
|
agentStartTime.
|
||||||
|
With(prometheus.Labels{"agent_guid": agent.Guid}).
|
||||||
|
Set(float64(agent.StartTime.UnixMilli()))
|
||||||
|
agentDuration.
|
||||||
|
With(prometheus.Labels{"agent_guid": agent.Guid}).
|
||||||
|
Set(float64(time.Now().Sub(agent.StartTime).Seconds()))
|
||||||
}
|
}
|
||||||
|
|
||||||
func clientActive(client models.Client) {
|
func clientActive(client models.Client) {
|
||||||
prevClient := lastClients[client.Guid]
|
prevClient, ok := lastClients[client.Guid]
|
||||||
if prevClient != nil && *prevClient != client {
|
if ok && prevClient != client {
|
||||||
removeClientMetrics(prevClient)
|
removeClientInfoMetrics(prevClient)
|
||||||
}
|
}
|
||||||
clientInfo.With(clientLabels(client)).Set(1)
|
clientInfo.With(clientLabels(client)).Set(1)
|
||||||
|
|
||||||
|
clientStartTime.
|
||||||
|
With(prometheus.Labels{"client_guid": client.Guid}).
|
||||||
|
Set(float64(client.StartTime.UnixMilli()))
|
||||||
|
clientDuration.
|
||||||
|
With(prometheus.Labels{"client_guid": client.Guid}).
|
||||||
|
Set(float64(time.Now().Sub(client.StartTime).Seconds()))
|
||||||
}
|
}
|
||||||
|
|
||||||
func setupPrometheus(mux *http.ServeMux, notifications chan *models.State) {
|
func setupPrometheus(mux *http.ServeMux, notifications chan *models.State) {
|
||||||
@ -203,27 +216,21 @@ func updateMetricsImpl(state *models.State) {
|
|||||||
// so that is still possible to identify the client or agent even though some values might
|
// so that is still possible to identify the client or agent even though some values might
|
||||||
// become 0.
|
// become 0.
|
||||||
|
|
||||||
agentGuids := make(map[string]*models.Agent)
|
agentGuids := make(map[string]models.Agent)
|
||||||
clientGuids := make(map[string]*models.Client)
|
clientGuids := make(map[string]models.Client)
|
||||||
|
|
||||||
agentCount.Set(float64(len(state.Agents)))
|
agentCount.Set(float64(len(state.Agents)))
|
||||||
disconnectedAgents := make(map[string]*models.Agent)
|
disconnectedAgents := make(map[string]models.Agent)
|
||||||
for k, v := range lastAgents {
|
for k, v := range lastAgents {
|
||||||
disconnectedAgents[k] = v
|
disconnectedAgents[k] = v
|
||||||
}
|
}
|
||||||
for _, agent := range state.Agents {
|
for _, agent := range state.Agents {
|
||||||
if lastAgents[agent.Guid] == nil {
|
if _, ok := lastAgents[agent.Guid]; !ok {
|
||||||
cumulativeAgentCount.Inc()
|
cumulativeAgentCount.Inc()
|
||||||
}
|
}
|
||||||
delete(disconnectedAgents, agent.Guid)
|
delete(disconnectedAgents, agent.Guid)
|
||||||
agentGuids[agent.Guid] = &agent
|
agentGuids[agent.Guid] = agent
|
||||||
agentActive(agent)
|
agentActive(agent)
|
||||||
agentStartTime.
|
|
||||||
With(prometheus.Labels{"agent_guid": agent.Guid}).
|
|
||||||
Set(float64(agent.StartTime.UnixMilli()))
|
|
||||||
agentDuration.
|
|
||||||
With(prometheus.Labels{"agent_guid": agent.Guid}).
|
|
||||||
Set(float64(time.Now().Sub(agent.StartTime).Seconds()))
|
|
||||||
}
|
}
|
||||||
for _, agent := range disconnectedAgents {
|
for _, agent := range disconnectedAgents {
|
||||||
removeAgentMetrics(agent)
|
removeAgentMetrics(agent)
|
||||||
@ -233,23 +240,17 @@ func updateMetricsImpl(state *models.State) {
|
|||||||
clientCount.Set(float64(len(state.Clients)))
|
clientCount.Set(float64(len(state.Clients)))
|
||||||
|
|
||||||
// with this app
|
// with this app
|
||||||
disconnectedClients := make(map[string]*models.Client)
|
disconnectedClients := make(map[string]models.Client)
|
||||||
for k, v := range lastClients {
|
for k, v := range lastClients {
|
||||||
disconnectedClients[k] = v
|
disconnectedClients[k] = v
|
||||||
}
|
}
|
||||||
for _, client := range state.Clients {
|
for _, client := range state.Clients {
|
||||||
if lastClients[client.Guid] == nil {
|
if _, ok := lastClients[client.Guid]; !ok {
|
||||||
cumulativeClientCount.Inc()
|
cumulativeClientCount.Inc()
|
||||||
}
|
}
|
||||||
delete(disconnectedClients, client.Guid)
|
delete(disconnectedClients, client.Guid)
|
||||||
clientGuids[client.Guid] = &client
|
clientGuids[client.Guid] = client
|
||||||
clientActive(client)
|
clientActive(client)
|
||||||
clientStartTime.
|
|
||||||
With(prometheus.Labels{"client_guid": client.Guid}).
|
|
||||||
Set(float64(client.StartTime.UnixMilli()))
|
|
||||||
clientDuration.
|
|
||||||
With(prometheus.Labels{"client_guid": client.Guid}).
|
|
||||||
Set(float64(time.Now().Sub(client.StartTime).Seconds()))
|
|
||||||
}
|
}
|
||||||
for _, client := range disconnectedClients {
|
for _, client := range disconnectedClients {
|
||||||
removeClientMetrics(client)
|
removeClientMetrics(client)
|
||||||
@ -257,8 +258,12 @@ func updateMetricsImpl(state *models.State) {
|
|||||||
lastClients = clientGuids
|
lastClients = clientGuids
|
||||||
}
|
}
|
||||||
|
|
||||||
func removeAgentMetrics(agent *models.Agent) {
|
func removeAgentInfoMetrics(agent models.Agent) bool {
|
||||||
ok1 := agentInfo.Delete(agentLabels(*agent))
|
return agentInfo.Delete(agentLabels(agent))
|
||||||
|
}
|
||||||
|
|
||||||
|
func removeAgentMetrics(agent models.Agent) {
|
||||||
|
ok1 := removeAgentInfoMetrics(agent)
|
||||||
guidLabels := prometheus.Labels{"agent_guid": agent.Guid}
|
guidLabels := prometheus.Labels{"agent_guid": agent.Guid}
|
||||||
ok2 := agentStartTime.Delete(guidLabels)
|
ok2 := agentStartTime.Delete(guidLabels)
|
||||||
// delayed deletion of the duration sow we are sure the prometheus has the last data.
|
// delayed deletion of the duration sow we are sure the prometheus has the last data.
|
||||||
@ -270,12 +275,17 @@ func removeAgentMetrics(agent *models.Agent) {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
if !ok1 || !ok2 {
|
if !ok1 || !ok2 {
|
||||||
log.Printf("Could not delete all timeseries for agent %s", agent.Guid)
|
log.Printf("Could not delete all timeseries for agent %s (info %v, starttime %v) ",
|
||||||
|
agent.Guid, ok1, ok2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func removeClientMetrics(client *models.Client) {
|
func removeClientInfoMetrics(client models.Client) bool {
|
||||||
ok1 := clientInfo.Delete(clientLabels(*client))
|
return clientInfo.Delete(clientLabels(client))
|
||||||
|
}
|
||||||
|
|
||||||
|
func removeClientMetrics(client models.Client) {
|
||||||
|
ok1 := removeClientInfoMetrics(client)
|
||||||
guidLabels := prometheus.Labels{"client_guid": client.Guid}
|
guidLabels := prometheus.Labels{"client_guid": client.Guid}
|
||||||
ok2 := clientStartTime.Delete(guidLabels)
|
ok2 := clientStartTime.Delete(guidLabels)
|
||||||
// delayed deletion of the duration sow we are sure the prometheus has the last data.
|
// delayed deletion of the duration sow we are sure the prometheus has the last data.
|
||||||
@ -287,6 +297,6 @@ func removeClientMetrics(client *models.Client) {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
if !ok1 || !ok2 {
|
if !ok1 || !ok2 {
|
||||||
log.Printf("Could not delete all timeseries for client %s", client.Guid)
|
log.Printf("Could not delete all timeseries for client %s (info %v, starttime %v)", client.Guid, ok1, ok2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user