-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy path210-pg_repl.yml
327 lines (323 loc) · 18.1 KB
/
210-pg_repl.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
##
# SYNOPSIS
# pg_repl.pg_repl_12_*
#
# DESCRIPTION
# PostgreSQL replication stat metrics 12+
#
# OPTIONS
# Tags [cluster]
# TTL 10
# Priority 0
# Timeout 100ms
# Fatal false
# Version 120000 ~ higher
# Source 210-pg_repl.yml
#
# METRICS
# appname (LABEL)
# Name of the application that is connected to this WAL sender
# usename (LABEL)
# Name of the user logged into this WAL sender process
# address (LABEL)
# IP address of the client connected to this WAL sender, localhost for unix socket
# pid (LABEL)
# Process ID of the WAL sender process
# client_port (GAUGE)
# TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used
# state (GAUGE)
# Current WAL sender encoded state 0-4 for streaming|startup|catchup|backup|stopping
# sync_state (GAUGE)
# Encoded synchronous state of this standby server, 0-3 for async|potential|sync|quorum
# sync_priority (GAUGE)
# Priority of this standby server for being chosen as the synchronous standby
# backend_xmin (COUNTER)
# This standby's xmin horizon reported by hot_standby_feedback.
# lsn (COUNTER)
# Current log position on this server
# sent_diff (GAUGE)
# Last log position sent to this standby server diff with current lsn
# write_diff (GAUGE)
# Last log position written to disk by this standby server diff with current lsn
# flush_diff (GAUGE)
# Last log position flushed to disk by this standby server diff with current lsn
# replay_diff (GAUGE)
# Last log position replayed into the database on this standby server diff with current lsn
# sent_lsn (COUNTER)
# Last write-ahead log location sent on this connection
# write_lsn (COUNTER)
# Last write-ahead log location written to disk by this standby server
# flush_lsn (COUNTER)
# Last write-ahead log location flushed to disk by this standby server
# replay_lsn (COUNTER)
# Last write-ahead log location replayed into the database on this standby server
# write_lag (GAUGE)
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it
# flush_lag (GAUGE)
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it
# replay_lag (GAUGE)
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it
# time (COUNTER)
# Current timestamp in unix epoch
# launch_time (COUNTER)
# Time when this process was started, i.e., when the client connected to this WAL sender
# reply_time (GAUGE)
# Send time of last reply message received from standby server
#
pg_repl_12:
name: pg_repl
desc: PostgreSQL replication stat metrics 12+
query: |
SELECT application_name AS appname, usename, coalesce(client_addr::TEXT,'localhost') AS address, pid::TEXT, client_port,
CASE state WHEN 'streaming' THEN 0 WHEN 'startup' THEN 1 WHEN 'catchup' THEN 2 WHEN 'backup' THEN 3 WHEN 'stopping' THEN 4 ELSE -1 END AS state,
CASE sync_state WHEN 'async' THEN 0 WHEN 'potential' THEN 1 WHEN 'sync' THEN 2 WHEN 'quorum' THEN 3 ELSE -1 END AS sync_state,
sync_priority, backend_xmin::TEXT::BIGINT AS backend_xmin, current.lsn - '0/0' AS lsn,
current.lsn - sent_lsn AS sent_diff, current.lsn - write_lsn AS write_diff, current.lsn - flush_lsn AS flush_diff, current.lsn - replay_lsn AS replay_diff,
sent_lsn - '0/0' AS sent_lsn, write_lsn - '0/0' AS write_lsn, flush_lsn - '0/0' AS flush_lsn, replay_lsn - '0/0' AS replay_lsn,
coalesce(extract(EPOCH FROM write_lag), 0) AS write_lag, coalesce(extract(EPOCH FROM flush_lag), 0) AS flush_lag, coalesce(extract(EPOCH FROM replay_lag), 0) AS replay_lag,
extract(EPOCH FROM current_timestamp) AS "time", extract(EPOCH FROM backend_start) AS launch_time, extract(EPOCH FROM reply_time) AS reply_time
FROM pg_stat_replication, (SELECT CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END AS lsn) current;
ttl: 10
min_version: 120000
tags:
- cluster
metrics:
- appname:
usage: LABEL
description: Name of the application that is connected to this WAL sender
- usename:
usage: LABEL
description: Name of the user logged into this WAL sender process
- address:
usage: LABEL
description: IP address of the client connected to this WAL sender, localhost for unix socket
# IP address of the client connected to this WAL sender. If this field is null, it indicates that the client is connected via a Unix socket on the server machine.
- pid:
usage: LABEL
description: Process ID of the WAL sender process
- client_port:
usage: GAUGE
description: TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used
- state:
usage: GAUGE
description: Current WAL sender encoded state 0-4 for streaming|startup|catchup|backup|stopping
# Current WAL sender state. Possible values are: streaming|startup|catchup|backup|stopping
- sync_state:
usage: GAUGE
description: Encoded synchronous state of this standby server, 0-3 for async|potential|sync|quorum
# Synchronous state of this standby server. Possible values are: async|potential|sync|quorum
- sync_priority:
usage: GAUGE
description: Priority of this standby server for being chosen as the synchronous standby
# Priority of this standby server for being chosen as the synchronous standby in a priority-based synchronous replication. This has no effect in a quorum-based synchronous replication.
- backend_xmin:
usage: COUNTER
description: This standby's xmin horizon reported by hot_standby_feedback.
- lsn:
usage: COUNTER
description: Current log position on this server
- sent_diff:
usage: GAUGE
description: Last log position sent to this standby server diff with current lsn
- write_diff:
usage: GAUGE
description: Last log position written to disk by this standby server diff with current lsn
- flush_diff:
usage: GAUGE
description: Last log position flushed to disk by this standby server diff with current lsn
- replay_diff:
usage: GAUGE
description: Last log position replayed into the database on this standby server diff with current lsn
- sent_lsn:
usage: COUNTER
description: Last write-ahead log location sent on this connection
- write_lsn:
usage: COUNTER
description: Last write-ahead log location written to disk by this standby server
- flush_lsn:
usage: COUNTER
description: Last write-ahead log location flushed to disk by this standby server
- replay_lsn:
usage: COUNTER
description: Last write-ahead log location replayed into the database on this standby server
- write_lag:
usage: GAUGE
description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it (but not yet flushed it or applied it). This can be used to gauge the delay that synchronous_commit level remote_write incurred while committing if this server was configured as a synchronous standby.
- flush_lag:
usage: GAUGE
description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it (but not yet applied it). This can be used to gauge the delay that synchronous_commit level on incurred while committing if this server was configured as a synchronous standby.
- replay_lag:
usage: GAUGE
description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it. This can be used to gauge the delay that synchronous_commit level remote_apply incurred while committing if this server was configured as a synchronous standby.
- time:
usage: COUNTER
description: Current timestamp in unix epoch
- launch_time:
usage: COUNTER
description: Time when this process was started, i.e., when the client connected to this WAL sender
- reply_time:
usage: GAUGE
description: Send time of last reply message received from standby server
# Time when this process was started, i.e., when the client connected to this WAL sender
##
# SYNOPSIS
# pg_repl.pg_repl_10_11_*
#
# DESCRIPTION
# PostgreSQL replication stat metrics v10 v11
#
# OPTIONS
# Tags [cluster]
# TTL 10
# Priority 0
# Timeout 100ms
# Fatal false
# Version 100000 ~ 120000
# Source 210-pg_repl.yml
#
# METRICS
# appname (LABEL)
# Name of the application that is connected to this WAL sender
# usename (LABEL)
# Name of the user logged into this WAL sender process
# address (LABEL)
# IP address of the client connected to this WAL sender, localhost for unix socket
# pid (LABEL)
# Process ID of the WAL sender process
# client_port (GAUGE)
# TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used
# state (GAUGE)
# Current WAL sender encoded state 0-4 for streaming|startup|catchup|backup|stopping
# sync_state (GAUGE)
# Encoded synchronous state of this standby server, 0-3 for async|potential|sync|quorum
# sync_priority (GAUGE)
# Priority of this standby server for being chosen as the synchronous standby
# backend_xmin (COUNTER)
# This standby's xmin horizon reported by hot_standby_feedback.
# lsn (COUNTER)
# Current log position on this server
# sent_diff (GAUGE)
# Last log position sent to this standby server diff with current lsn
# write_diff (GAUGE)
# Last log position written to disk by this standby server diff with current lsn
# flush_diff (GAUGE)
# Last log position flushed to disk by this standby server diff with current lsn
# replay_diff (GAUGE)
# Last log position replayed into the database on this standby server diff with current lsn
# sent_lsn (COUNTER)
# Last write-ahead log location sent on this connection
# write_lsn (COUNTER)
# Last write-ahead log location written to disk by this standby server
# flush_lsn (COUNTER)
# Last write-ahead log location flushed to disk by this standby server
# replay_lsn (COUNTER)
# Last write-ahead log location replayed into the database on this standby server
# write_lag (GAUGE)
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it
# flush_lag (GAUGE)
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it
# replay_lag (GAUGE)
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it
# time (COUNTER)
# Current timestamp in unix epoch
# launch_time (COUNTER)
# Time when this process was started, i.e., when the client connected to this WAL sender
#
pg_repl_10_11:
name: pg_repl
desc: PostgreSQL replication stat metrics v10 v11
query: |
SELECT application_name AS appname, usename, coalesce(client_addr::TEXT,'localhost') AS address, pid::TEXT, client_port,
CASE state WHEN 'streaming' THEN 0 WHEN 'startup' THEN 1 WHEN 'catchup' THEN 2 WHEN 'backup' THEN 3 WHEN 'stopping' THEN 4 ELSE -1 END AS state,
CASE sync_state WHEN 'async' THEN 0 WHEN 'potential' THEN 1 WHEN 'sync' THEN 2 WHEN 'quorum' THEN 3 ELSE -1 END AS sync_state,
sync_priority, backend_xmin::TEXT::BIGINT AS backend_xmin, current.lsn - '0/0' AS lsn,
current.lsn - sent_lsn AS sent_diff, current.lsn - write_lsn AS write_diff, current.lsn - flush_lsn AS flush_diff, current.lsn - replay_lsn AS replay_diff,
sent_lsn - '0/0' AS sent_lsn, write_lsn - '0/0' AS write_lsn, flush_lsn - '0/0' AS flush_lsn, replay_lsn - '0/0' AS replay_lsn,
coalesce(extract(EPOCH FROM write_lag), 0) AS write_lag, coalesce(extract(EPOCH FROM flush_lag), 0) AS flush_lag, coalesce(extract(EPOCH FROM replay_lag), 0) AS replay_lag,
extract(EPOCH FROM current_timestamp) AS "time", extract(EPOCH FROM backend_start) AS launch_time
FROM pg_stat_replication, (SELECT CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END AS lsn) current;
ttl: 10
min_version: 100000
max_version: 120000
tags:
- cluster
metrics:
- appname:
usage: LABEL
description: Name of the application that is connected to this WAL sender
- usename:
usage: LABEL
description: Name of the user logged into this WAL sender process
- address:
usage: LABEL
description: IP address of the client connected to this WAL sender, localhost for unix socket
# IP address of the client connected to this WAL sender. If this field is null, it indicates that the client is connected via a Unix socket on the server machine.
- pid:
usage: LABEL
description: Process ID of the WAL sender process
- client_port:
usage: GAUGE
description: TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used
- state:
usage: GAUGE
description: Current WAL sender encoded state 0-4 for streaming|startup|catchup|backup|stopping
# Current WAL sender state. Possible values are: streaming|startup|catchup|backup|stopping
- sync_state:
usage: GAUGE
description: Encoded synchronous state of this standby server, 0-3 for async|potential|sync|quorum
# Synchronous state of this standby server. Possible values are: async|potential|sync|quorum
- sync_priority:
usage: GAUGE
description: Priority of this standby server for being chosen as the synchronous standby
# Priority of this standby server for being chosen as the synchronous standby in a priority-based synchronous replication. This has no effect in a quorum-based synchronous replication.
- backend_xmin:
usage: COUNTER
description: This standby's xmin horizon reported by hot_standby_feedback.
- lsn:
usage: COUNTER
description: Current log position on this server
- sent_diff:
usage: GAUGE
description: Last log position sent to this standby server diff with current lsn
- write_diff:
usage: GAUGE
description: Last log position written to disk by this standby server diff with current lsn
- flush_diff:
usage: GAUGE
description: Last log position flushed to disk by this standby server diff with current lsn
- replay_diff:
usage: GAUGE
description: Last log position replayed into the database on this standby server diff with current lsn
- sent_lsn:
usage: COUNTER
description: Last write-ahead log location sent on this connection
- write_lsn:
usage: COUNTER
description: Last write-ahead log location written to disk by this standby server
- flush_lsn:
usage: COUNTER
description: Last write-ahead log location flushed to disk by this standby server
- replay_lsn:
usage: COUNTER
description: Last write-ahead log location replayed into the database on this standby server
- write_lag:
usage: GAUGE
description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it (but not yet flushed it or applied it). This can be used to gauge the delay that synchronous_commit level remote_write incurred while committing if this server was configured as a synchronous standby.
- flush_lag:
usage: GAUGE
description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it (but not yet applied it). This can be used to gauge the delay that synchronous_commit level on incurred while committing if this server was configured as a synchronous standby.
- replay_lag:
usage: GAUGE
description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it
# Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it. This can be used to gauge the delay that synchronous_commit level remote_apply incurred while committing if this server was configured as a synchronous standby.
- time:
usage: COUNTER
description: Current timestamp in unix epoch
- launch_time:
usage: COUNTER
description: Time when this process was started, i.e., when the client connected to this WAL sender