gRPC is the backbone of modern microservice architectures — high-performance, type-safe, and language-agnostic. But its binary HTTP/2 framing means most uptime monitoring tools can't probe it directly. A curl to a gRPC port returns a 400 or a garbled binary response. Standard HTTP monitors bounce off without telling you anything useful.
Vigilmon works with gRPC services through two complementary approaches: the gRPC Health Checking Protocol over HTTP/2 (exposed via a gRPC-Gateway), and traditional HTTP health endpoints added alongside your gRPC server. This tutorial covers both, plus heartbeat monitoring for gRPC background processors.
The gRPC Health Checking Protocol
The gRPC ecosystem defines a standard health check service in grpc.health.v1:
// google/grpc/health/v1/health.proto
syntax = "proto3";
package grpc.health.v1;
message HealthCheckRequest {
string service = 1;
}
message HealthCheckResponse {
enum ServingStatus {
UNKNOWN = 0;
SERVING = 1;
NOT_SERVING = 2;
SERVICE_UNKNOWN = 3;
}
ServingStatus status = 1;
}
service Health {
rpc Check(HealthCheckRequest) returns (HealthCheckResponse);
rpc Watch(HealthCheckRequest) returns (stream HealthCheckResponse);
}
Most gRPC libraries ship a ready-made implementation. The status values mean:
| Status | Meaning | HTTP Equivalent |
|---|---|---|
| SERVING | Service is healthy and ready | 200 OK |
| NOT_SERVING | Service is running but not ready | 503 |
| UNKNOWN | Status cannot be determined | 503 |
| SERVICE_UNKNOWN | No such service registered | 404 |
Step 1: Implement gRPC Health Checks in Your Service
Go
// main.go
package main
import (
"context"
"net"
"net/http"
"log"
"os"
"os/signal"
"google.golang.org/grpc"
"google.golang.org/grpc/health"
healthpb "google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/reflection"
)
func main() {
grpcServer := grpc.NewServer()
// Register your actual service
pb.RegisterMyServiceServer(grpcServer, &myServiceServer{})
// Register gRPC health check
healthSrv := health.NewServer()
healthpb.RegisterHealthServer(grpcServer, healthSrv)
// Mark your service as SERVING
healthSrv.SetServingStatus("myservice.MyService", healthpb.HealthCheckResponse_SERVING)
// Optional: mark the overall server (empty string = all services)
healthSrv.SetServingStatus("", healthpb.HealthCheckResponse_SERVING)
reflection.Register(grpcServer)
lis, _ := net.Listen("tcp", ":50051")
log.Println("gRPC server listening on :50051")
grpcServer.Serve(lis)
}
Node.js (@grpc/grpc-js)
const grpc = require('@grpc/grpc-js');
const health = require('grpc-health-check');
const healthImpl = new health.Implementation({
'myservice.MyService': health.servingStatus.SERVING,
'': health.servingStatus.SERVING,
});
const server = new grpc.Server();
server.addService(health.service, healthImpl);
server.addService(myServiceDef, myServiceImpl);
server.bindAsync('0.0.0.0:50051',
grpc.ServerCredentials.createInsecure(),
() => server.start()
);
// Update status dynamically when dependencies change
async function checkDependencies() {
try {
await db.ping();
healthImpl.setStatus('myservice.MyService', health.servingStatus.SERVING);
} catch {
healthImpl.setStatus('myservice.MyService', health.servingStatus.NOT_SERVING);
}
}
setInterval(checkDependencies, 10_000);
Python (grpcio-health-checking)
# server.py
from concurrent import futures
import grpc
from grpc_health.v1 import health, health_pb2, health_pb2_grpc
def serve():
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
# Add your service
pb2_grpc.add_MyServiceServicer_to_server(MyServiceServicer(), server)
# Add health service
health_servicer = health.HealthServicer()
health_pb2_grpc.add_HealthServicer_to_server(health_servicer, server)
health_servicer.set(
"myservice.MyService",
health_pb2.HealthCheckResponse.SERVING
)
health_servicer.set(
"",
health_pb2.HealthCheckResponse.SERVING
)
server.add_insecure_port("[::]:50051")
server.start()
server.wait_for_termination()
Test your health check with grpcurl:
# Install: brew install grpcurl or go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
grpcurl -plaintext localhost:50051 grpc.health.v1.Health/Check
# {
# "status": "SERVING"
# }
# Check a specific service
grpcurl -plaintext -d '{"service":"myservice.MyService"}' \
localhost:50051 grpc.health.v1.Health/Check
Step 2: Expose an HTTP Health Endpoint via gRPC-Gateway
Vigilmon's HTTP probes cannot speak the gRPC wire protocol natively. The cleanest solution is to run a gRPC-Gateway sidecar that translates HTTP/1.1 requests to gRPC calls, or to add a parallel HTTP server to your gRPC service.
Option A: Parallel HTTP Server (Simplest)
Add a lightweight HTTP server alongside your gRPC server that checks gRPC health internally:
// http_health.go — run alongside your gRPC server
package main
import (
"context"
"encoding/json"
"net/http"
"time"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
healthpb "google.golang.org/grpc/health/grpc_health_v1"
)
func startHTTPHealthServer(grpcAddr string) {
conn, err := grpc.Dial(grpcAddr,
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithBlock(),
grpc.WithTimeout(5*time.Second),
)
if err != nil {
panic(err)
}
healthClient := healthpb.NewHealthClient(conn)
http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(r.Context(), 3*time.Second)
defer cancel()
resp, err := healthClient.Check(ctx, &healthpb.HealthCheckRequest{
Service: "myservice.MyService",
})
w.Header().Set("Content-Type", "application/json")
if err != nil || resp.Status != healthpb.HealthCheckResponse_SERVING {
status := "NOT_SERVING"
if resp != nil {
status = resp.Status.String()
}
w.WriteHeader(http.StatusServiceUnavailable)
json.NewEncoder(w).Encode(map[string]string{
"status": "down",
"grpc_status": status,
})
return
}
json.NewEncoder(w).Encode(map[string]string{
"status": "ok",
"grpc_status": "SERVING",
})
})
go http.ListenAndServe(":8080", nil)
}
Option B: gRPC-Gateway Transcoding
If you use Protocol Buffers for your service definition, gRPC-Gateway generates a reverse proxy that translates REST calls to gRPC:
# Install gRPC-Gateway
go get github.com/grpc-ecosystem/grpc-gateway/v2/runtime
// gateway.go
package main
import (
"context"
"net/http"
"github.com/grpc-ecosystem/grpc-gateway/v2/runtime"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
healthpb "google.golang.org/grpc/health/grpc_health_v1"
pb "your-module/gen/pb"
)
func runGateway() error {
ctx := context.Background()
mux := runtime.NewServeMux()
opts := []grpc.DialOption{
grpc.WithTransportCredentials(insecure.NewCredentials()),
}
// Register your service
pb.RegisterMyServiceHandlerFromEndpoint(ctx, mux, "localhost:50051", opts)
// Add a custom health endpoint that checks gRPC health
mux.HandlePath("GET", "/health", func(w http.ResponseWriter, r *http.Request, _ map[string]string) {
conn, _ := grpc.Dial("localhost:50051", opts...)
client := healthpb.NewHealthClient(conn)
resp, err := client.Check(ctx, &healthpb.HealthCheckRequest{})
conn.Close()
if err != nil || resp.Status != healthpb.HealthCheckResponse_SERVING {
w.WriteHeader(503)
w.Write([]byte(`{"status":"down"}`))
return
}
w.Write([]byte(`{"status":"ok","grpc_status":"SERVING"}`))
})
return http.ListenAndServe(":8080", mux)
}
Verify the HTTP endpoint:
curl -i http://localhost:8080/health
# HTTP/1.1 200 OK
# {"status":"ok","grpc_status":"SERVING"}
Step 3: Configure Vigilmon HTTP Monitor for gRPC Services
With your HTTP health endpoint running, configure Vigilmon:
- Log in to vigilmon.online and go to Monitors → New Monitor
- Choose HTTP / HTTPS
- URL:
https://your-grpc-service.example.com/health - Check interval: 1 minute
- Expected response:
- Status code:
200 - Response body contains:
"status":"ok" - Response time threshold:
2000ms
- Status code:
- Assign alert channels
- Save
Differentiating SERVING vs NOT_SERVING vs UNKNOWN
Refine your health endpoint to map gRPC statuses to distinct HTTP response codes:
func grpcStatusToHTTP(s healthpb.HealthCheckResponse_ServingStatus) (int, string) {
switch s {
case healthpb.HealthCheckResponse_SERVING:
return 200, "SERVING"
case healthpb.HealthCheckResponse_NOT_SERVING:
return 503, "NOT_SERVING"
case healthpb.HealthCheckResponse_UNKNOWN:
return 503, "UNKNOWN"
default:
return 503, "SERVICE_UNKNOWN"
}
}
This lets Vigilmon's body check distinguish between "service is explicitly NOT_SERVING" and "the health RPC itself failed" — two very different root causes requiring different runbook actions.
Step 4: Heartbeat Monitoring for gRPC Background Processors
gRPC streaming RPCs, event processors, and background queue consumers run continuously and cannot be probed by HTTP monitors. If a bidirectional stream drops, a queue consumer crashes, or a scheduled gRPC job fails, there's no endpoint to probe.
Vigilmon heartbeat monitors handle this: your processor pings Vigilmon after each successful processing cycle, and Vigilmon alerts if the pings stop.
Set Up the Heartbeat Monitor
- Monitors → New Monitor → Heartbeat
- Name:
grpc-event-processor - Expected interval: 5 minutes
- Grace period: 10 minutes
- Save — copy the heartbeat URL
Wire Into a gRPC Stream Consumer
// processor.go — gRPC streaming consumer with Vigilmon heartbeat
package main
import (
"context"
"log"
"net/http"
"os"
"time"
pb "your-module/gen/pb"
"google.golang.org/grpc"
)
func sendHeartbeat(url string) {
resp, err := http.Get(url)
if err != nil {
log.Printf("Vigilmon heartbeat failed: %v", err)
return
}
resp.Body.Close()
}
func runProcessor(client pb.EventServiceClient) {
heartbeatURL := os.Getenv("VIGILMON_HEARTBEAT_URL")
lastHeartbeat := time.Now()
stream, err := client.StreamEvents(context.Background(),
&pb.StreamEventsRequest{})
if err != nil {
log.Fatalf("Failed to open stream: %v", err)
}
for {
event, err := stream.Recv()
if err != nil {
log.Printf("Stream error: %v — reconnecting", err)
break
}
processEvent(event)
// Ping Vigilmon every 5 minutes (or after N events)
if time.Since(lastHeartbeat) > 5*time.Minute {
sendHeartbeat(heartbeatURL)
lastHeartbeat = time.Now()
}
}
}
Python gRPC Consumer with Heartbeat
import grpc
import requests
import os
import time
import your_pb2_grpc as pb2_grpc
import your_pb2 as pb2
HEARTBEAT_URL = os.environ["VIGILMON_HEARTBEAT_URL"]
HEARTBEAT_INTERVAL = 300 # 5 minutes
def process_events(stub):
last_heartbeat = time.time()
for event in stub.StreamEvents(pb2.StreamEventsRequest()):
process_event(event)
if time.time() - last_heartbeat > HEARTBEAT_INTERVAL:
try:
requests.get(HEARTBEAT_URL, timeout=5)
last_heartbeat = time.time()
except Exception as e:
print(f"Heartbeat failed: {e}")
def main():
channel = grpc.insecure_channel("localhost:50051")
stub = pb2_grpc.EventServiceStub(channel)
while True:
try:
process_events(stub)
except grpc.RpcError as e:
print(f"gRPC error: {e.code()} — {e.details()}")
time.sleep(5) # backoff before reconnect
Step 5: Multi-Service gRPC Monitoring
For a microservices architecture with multiple gRPC services, create one monitor per service:
| Monitor Name | URL | Alert Priority |
|---|---|---|
| [grpc] auth-service /health | https://auth.internal/health | P1 |
| [grpc] user-service /health | https://users.internal/health | P1 |
| [grpc] notification-service /health | https://notify.internal/health | P2 |
| [grpc] event-processor heartbeat | heartbeat (5 min) | P2 |
| [grpc] report-processor heartbeat | heartbeat (1 hr) | P2 |
Group all gRPC service monitors into a Status Page in Vigilmon for a consolidated view. During an incident, you'll see at a glance whether one service is down or whether the entire gRPC layer is affected.
Summary
gRPC services are not directly probeable by HTTP monitors, but a thin HTTP health adapter unlocks full external monitoring:
| Step | What It Adds | |---|---| | gRPC Health Check Protocol | Standard SERVING/NOT_SERVING status per service | | HTTP sidecar or gRPC-Gateway | HTTP endpoint Vigilmon can probe | | Vigilmon HTTP monitor | External reachability, TLS, latency monitoring | | Vigilmon heartbeat monitor | Continuous stream processor and batch job liveness |
The entire setup takes under 30 minutes per service. Start with a free account at vigilmon.online — no credit card required.