Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incorrect smartctl exit status #57

Open
fernandolcx opened this issue Jan 20, 2025 · 0 comments
Open

Incorrect smartctl exit status #57

fernandolcx opened this issue Jan 20, 2025 · 0 comments

Comments

@fernandolcx
Copy link

The problem is identical to official Go exporter reported here: prometheus-community/smartctl_exporter#265

I've Intentionally plugged some very bad SAS hard drives in a 24-bay enclosure.
Noticed some discrepancy regarding smartprom_smartctl_exit_status / smartprom_exit_code, which is an important indicator of drive failure:

# smartctl -a -T permissive /dev/sdc -d scsi; echo $?
4
smartctl -a -T permissive /dev/sdd -d scsi; echo $?
0
smartctl -a -T permissive /dev/sde -d scsi; echo $?
0
# smartctl -a -T permissive /dev/sdk -d scsi; echo $?
128
# smartctl -a -T permissive /dev/sdp -d scsi; echo $?
8
# smartctl -a -T permissive /dev/sdq -d scsi; echo $?
8

however:

smartprom_smartctl_exit_status{drive="/dev/sdc", ... } 0.0
smartprom_smartctl_exit_status{drive="/dev/sde", ... } 0.0
smartprom_smartctl_exit_status{drive="/dev/sdk", ... } 0.0
smartprom_smartctl_exit_status{drive="/dev/sdp", ... } 8.0
smartprom_smartctl_exit_status{drive="/dev/sdq", ... } 8.0

JSON data for/dev/sdc(drive defect: takes around 60s to respond)
{
  "json_format_version": [
    1,
    0
  ],
  "smartctl": {
    "version": [
      7,
      3
    ],
    "svn_revision": "5338",
    "platform_info": "x86_64-linux-6.8.12-4-pve",
    "build_info": "(local build)",
    "argv": [
      "smartctl",
      "-a",
      "-T",
      "permissive",
      "-d",
      "scsi",
      "/dev/sdc",
      "--json"
    ],
    "exit_status": 4
  },
  "local_time": {
    "time_t": 1737324399,
    "asctime": "Sun Jan 19 19:06:39 2025 -03"
  },
  "device": {
    "name": "/dev/sdc",
    "info_name": "/dev/sdc",
    "type": "scsi",
    "protocol": "SCSI"
  },
  "scsi_vendor": "SEAGATE",
  "scsi_product": "ST3600057SS",
  "scsi_model_name": "SEAGATE ST3600057SS",
  "scsi_revision": "EN03",
  "scsi_version": "SPC-3",
  "rotation_rate": 15000,
  "form_factor": {
    "scsi_value": 2,
    "name": "3.5 inches"
  },
  "logical_unit_id": "0x5000c50053c26987",
  "serial_number": "6SL458D5",
  "device_type": {
    "scsi_terminology": "Peripheral Device Type [PDT]",
    "scsi_value": 0,
    "name": "disk"
  },
  "scsi_transport_protocol": {
    "name": "SAS (SPL-4)",
    "value": 6
  },
  "smart_support": {
    "available": false
  },
  "temperature": {
    "current": 0
  },
  "scsi_temperature": {
    "drive_trip": 0
  }
}
JSON data for /dev/sdk (drive defect: several bad blocks, DST failing)
{
  "json_format_version": [
    1,
    0
  ],
  "smartctl": {
    "version": [
      7,
      3
    ],
    "svn_revision": "5338",
    "platform_info": "x86_64-linux-6.8.12-4-pve",
    "build_info": "(local build)",
    "argv": [
      "smartctl",
      "-a",
      "-T",
      "permissive",
      "-d",
      "scsi",
      "/dev/sdk",
      "--json"
    ],
    "exit_status": 128
  },
  "local_time": {
    "time_t": 1737324706,
    "asctime": "Sun Jan 19 19:11:46 2025 -03"
  },
  "device": {
    "name": "/dev/sdk",
    "info_name": "/dev/sdk",
    "type": "scsi",
    "protocol": "SCSI"
  },
  "scsi_vendor": "IBM-SSG",
  "scsi_product": "S7AQ3P0",
  "scsi_model_name": "IBM-SSG S7AQ3P0",
  "scsi_revision": "A058",
  "scsi_version": "SPC-4",
  "user_capacity": {
    "blocks": 5860533168,
    "bytes": 3000592982016
  },
  "logical_block_size": 512,
  "rotation_rate": 7200,
  "form_factor": {
    "scsi_value": 2,
    "name": "3.5 inches"
  },
  "logical_unit_id": "0x5000c500349f40f7",
  "serial_number": "Z290Q54400009145WCJ4",
  "device_type": {
    "scsi_terminology": "Peripheral Device Type [PDT]",
    "scsi_value": 0,
    "name": "disk"
  },
  "scsi_transport_protocol": {
    "name": "SAS (SPL-4)",
    "value": 6
  },
  "smart_support": {
    "available": true,
    "enabled": true
  },
  "temperature_warning": {
    "enabled": true
  },
  "smart_status": {
    "passed": true
  },
  "temperature": {
    "current": 39,
    "drive_trip": 65
  },
  "power_on_time": {
    "hours": 51871,
    "minutes": 57
  },
  "scsi_grown_defect_list": 1239,
  "scsi_error_counter_log": {
    "read": {
      "errors_corrected_by_eccfast": 2545895999,
      "errors_corrected_by_eccdelayed": 0,
      "errors_corrected_by_rereads_rewrites": 0,
      "total_errors_corrected": 2545895999,
      "correction_algorithm_invocations": 1,
      "gigabytes_processed": "529664.798",
      "total_uncorrected_errors": 1
    },
    "write": {
      "errors_corrected_by_eccfast": 0,
      "errors_corrected_by_eccdelayed": 0,
      "errors_corrected_by_rereads_rewrites": 0,
      "total_errors_corrected": 0,
      "correction_algorithm_invocations": 0,
      "gigabytes_processed": "104245.228",
      "total_uncorrected_errors": 0
    },
    "verify": {
      "errors_corrected_by_eccfast": 836386951,
      "errors_corrected_by_eccdelayed": 1,
      "errors_corrected_by_rereads_rewrites": 0,
      "total_errors_corrected": 836386952,
      "correction_algorithm_invocations": 1,
      "gigabytes_processed": "4497341.609",
      "total_uncorrected_errors": 0
    }
  },
  "scsi_self_test_0": {
    "code": {
      "value": 2,
      "string": "Background long"
    },
    "result": {
      "value": 7,
      "string": "Failed in segment"
    },
    "power_on_time": {
      "hours": 51543,
      "aka": "accumulated_power_on_hours"
    },
    "lba_first_failure": {
      "value": 5165202415,
      "aka": "address_of_first_failure"
    },
    "sense_key": {
      "value": 3,
      "string": "Medium Error"
    },
    "asc": 17,
    "ascq": 0,
    "vendor_specific": 129
  },
  "scsi_self_test_1": {
    "code": {
      "value": 2,
      "string": "Background long"
    },
    "result": {
      "value": 7,
      "string": "Failed in segment"
    },
    "power_on_time": {
      "hours": 51288,
      "aka": "accumulated_power_on_hours"
    },
    "lba_first_failure": {
      "value": 5165202415,
      "aka": "address_of_first_failure"
    },
    "sense_key": {
      "value": 3,
      "string": "Medium Error"
    },
    "asc": 17,
    "ascq": 0,
    "vendor_specific": 129
  },
  "scsi_self_test_2": {
    "code": {
      "value": 2,
      "string": "Background long"
    },
    "result": {
      "value": 2,
      "string": "Aborted (device reset ?)"
    },
    "power_on_time": {
      "hours": 50516,
      "aka": "accumulated_power_on_hours"
    }
  },
  "scsi_extended_self_test_seconds": 27600
}
JSON data for /dev/sdp (drive defect: I don't know, available spare sectors depletion maybe?)
{
  "json_format_version": [
    1,
    0
  ],
  "smartctl": {
    "version": [
      7,
      3
    ],
    "svn_revision": "5338",
    "platform_info": "x86_64-linux-6.8.12-4-pve",
    "build_info": "(local build)",
    "argv": [
      "smartctl",
      "-a",
      "-T",
      "permissive",
      "-d",
      "scsi",
      "/dev/sdp",
      "--json"
    ],
    "exit_status": 8
  },
  "local_time": {
    "time_t": 1737324877,
    "asctime": "Sun Jan 19 19:14:37 2025 -03"
  },
  "device": {
    "name": "/dev/sdp",
    "info_name": "/dev/sdp",
    "type": "scsi",
    "protocol": "SCSI"
  },
  "scsi_vendor": "IBM-SSG",
  "scsi_product": "H0YH600",
  "scsi_model_name": "IBM-SSG H0YH600",
  "scsi_revision": "CF08",
  "scsi_version": "SPC-4",
  "user_capacity": {
    "blocks": 1172123568,
    "bytes": 600127266816
  },
  "logical_block_size": 512,
  "scsi_lb_provisioning": {
    "name": "fully provisioned",
    "value": 0,
    "management_enabled": {
      "name": "LBPME",
      "value": 0
    },
    "read_zeros": {
      "name": "LBPRZ",
      "value": 0
    }
  },
  "rotation_rate": 10000,
  "form_factor": {
    "scsi_value": 3,
    "name": "2.5 inches"
  },
  "logical_unit_id": "0x5000cca07d4e1980",
  "serial_number": "0BHBYHXH",
  "device_type": {
    "scsi_terminology": "Peripheral Device Type [PDT]",
    "scsi_value": 0,
    "name": "disk"
  },
  "scsi_transport_protocol": {
    "name": "SAS (SPL-4)",
    "value": 6
  },
  "smart_support": {
    "available": true,
    "enabled": true
  },
  "temperature_warning": {
    "enabled": true
  },
  "smart_status": {
    "passed": false,
    "scsi": {
      "asc": 93,
      "ascq": 100,
      "ie_string": "FIRMWARE IMPENDING FAILURE TOO MANY BLOCK REASSIGNS"
    }
  },
  "temperature": {
    "current": 32,
    "drive_trip": 65
  },
  "power_on_time": {
    "hours": 21975,
    "minutes": 22
  },
  "scsi_start_stop_cycle_counter": {
    "year_of_manufacture": "2018",
    "week_of_manufacture": "09",
    "specified_cycle_count_over_device_lifetime": 50000,
    "accumulated_start_stop_cycles": 88,
    "specified_load_unload_count_over_device_lifetime": 600000,
    "accumulated_load_unload_cycles": 998
  },
  "scsi_grown_defect_list": 0,
  "scsi_error_counter_log": {
    "read": {
      "errors_corrected_by_eccfast": 0,
      "errors_corrected_by_eccdelayed": 221387,
      "errors_corrected_by_rereads_rewrites": 0,
      "total_errors_corrected": 221387,
      "correction_algorithm_invocations": 32499236,
      "gigabytes_processed": "5575.751",
      "total_uncorrected_errors": 40
    },
    "write": {
      "errors_corrected_by_eccfast": 0,
      "errors_corrected_by_eccdelayed": 0,
      "errors_corrected_by_rereads_rewrites": 0,
      "total_errors_corrected": 0,
      "correction_algorithm_invocations": 87410,
      "gigabytes_processed": "19893.007",
      "total_uncorrected_errors": 0
    },
    "verify": {
      "errors_corrected_by_eccfast": 0,
      "errors_corrected_by_eccdelayed": 3795,
      "errors_corrected_by_rereads_rewrites": 0,
      "total_errors_corrected": 3795,
      "correction_algorithm_invocations": 389822872,
      "gigabytes_processed": "115152.801",
      "total_uncorrected_errors": 0
    }
  }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant