Skip to content

Commit

Permalink
Merge pull request containers#180 from klihub/fixes/config-manager
Browse files Browse the repository at this point in the history
config-manager: don't crash if no runtime is found running.
  • Loading branch information
marquiz authored Oct 26, 2023
2 parents 2be40af + 8146d2f commit 8832f81
Showing 1 changed file with 40 additions and 17 deletions.
57 changes: 40 additions & 17 deletions cmd/config-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ import (
"bytes"
"context"
"fmt"
"log"
"os"
"time"

"github.com/coreos/go-systemd/v22/dbus"
tomlv2 "github.com/pelletier/go-toml/v2"
"github.com/sirupsen/logrus"
)

const (
Expand All @@ -38,11 +39,16 @@ const (
crioUnit = "crio.service"
)

var (
log = logrus.StandardLogger()
)

func main() {
unit, err := detectRuntime()
unit, conn, err := detectRuntime()
if err != nil {
log.Fatalf("failed to autodetect container runtime: %v", err)
}
defer conn.Close()

switch unit {
case containerdUnit:
Expand All @@ -57,14 +63,28 @@ func main() {
log.Fatalf("error enabling NRI: %v", err)
}

if err = restartSystemdUnit(unit); err != nil {
//
// TODO(klihub): Kludge warning...
// If the runtime is CRI-O, it looks like we need to cut it some
// slack, after we've been started up by it but before we restart
// it. Otherwise it always reports our exit status as -1 (255).
// We are an init-container so a non-zero exit status would prevent
// other containers in our pod from ever starting...
//

if unit == crioUnit {
time.Sleep(3 * time.Second)
}

if err = restartSystemdUnit(conn, unit); err != nil {
log.Fatalf("failed to restart %q unit: %v", unit, err)
}

log.Println("enabled NRI for", unit)
}

func enableNriForContainerd() error {
log.Infof("enabling NRI in containerd configuration...")
tomlMap, err := readConfig(containerdConfigFile)
if err != nil {
return fmt.Errorf("error reading TOML file: %w", err)
Expand All @@ -80,6 +100,7 @@ func enableNriForContainerd() error {
}

func enableNriForCrio() error {
log.Infof("enabling NRI in CRI-O configuration...")
f, err := os.Create(crioConfigFile)
if err != nil {
return fmt.Errorf("error creating a drop-in file for CRI-O: %w", err)
Expand Down Expand Up @@ -147,40 +168,42 @@ func updateContainerdConfig(config map[string]interface{}) map[string]interface{
return config
}

func detectRuntime() (string, error) {
func detectRuntime() (string, *dbus.Conn, error) {
log.Infof("setting up D-Bus connection...")
conn, err := dbus.NewSystemConnectionContext(context.Background())
if err != nil {
return "", fmt.Errorf("failed to create DBus connection: %w", err)
return "", nil, fmt.Errorf("failed to create DBus connection: %w", err)
}
defer conn.Close()

// Filter out active container runtime (CRI-O or containerd) systemd units on the node.
// It is expected that only one container runtime systemd unit should be active at a time
// (either containerd or CRI-O).If more than one container runtime systemd unit is found
// to be in an active state, the process fails.
log.Infof("looking for active runtime units on D-Bus...")
units, err := conn.ListUnitsByPatternsContext(context.Background(), []string{"active"}, []string{containerdUnit, crioUnit})
if err != nil {
return "", fmt.Errorf("failed to detect container runtime in use: %w", err)
return "", nil, fmt.Errorf("failed to detect container runtime in use: %w", err)
}

if len(units) == 0 {
return "", nil, fmt.Errorf("failed to detect container runtime in use: got 0 systemd units")
}

if len(units) > 1 {
return "", fmt.Errorf("detected more than one container runtime on the host, expected one")
return "", nil, fmt.Errorf("detected more than one container runtime on the host, expected one")
}

return units[0].Name, nil
}
log.Infof("found %s...", units[0].Name)

func restartSystemdUnit(unit string) error {
conn, err := dbus.NewSystemConnectionContext(context.Background())
if err != nil {
return fmt.Errorf("failed to create DBus connection: %w", err)
}
defer conn.Close()
return units[0].Name, conn, nil
}

func restartSystemdUnit(conn *dbus.Conn, unit string) error {
resC := make(chan string)
defer close(resC)

_, err = conn.RestartUnitContext(context.Background(), unit, replaceMode, resC)
log.Infof("restarting D-Bus unit %s...", unit)
_, err := conn.RestartUnitContext(context.Background(), unit, replaceMode, resC)
if err != nil {
return fmt.Errorf("failed to restart systemd unit %q: %w", unit, err)
}
Expand Down

0 comments on commit 8832f81

Please sign in to comment.