Skip to content

Commit

Permalink
probe: performance enhancements
Browse files Browse the repository at this point in the history
- Run out of SRAM, it goes quite a lot faster
- Cache the DAP clock frequency
- Read RDATA and write WDATA all in one go, and merge the turnaround
  into the ACK phase. Reduces the number of missed PIO SM cycles during an
  SWD transfer

Also fix a compiler warning and demote the loglevel of some debug messages.

Signed-off-by: Jonathan Bell <[email protected]>
  • Loading branch information
P33M committed Aug 18, 2022
1 parent 866f702 commit 47e2043
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 38 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,6 @@ target_compile_definitions (picoprobe PRIVATE

target_link_libraries(picoprobe PRIVATE pico_stdlib pico_unique_id tinyusb_device tinyusb_board hardware_pio)

pico_set_binary_type(picoprobe copy_to_ram)

pico_add_extra_outputs(picoprobe)
4 changes: 2 additions & 2 deletions src/picoprobe_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@
// PIO config
#define PROBE_SM 0
#define PROBE_PIN_OFFSET 2
#define PROBE_PIN_SWCLK PROBE_PIN_OFFSET + 0 // 2
#define PROBE_PIN_SWDIO PROBE_PIN_OFFSET + 1 // 3
#define PROBE_PIN_SWCLK (PROBE_PIN_OFFSET + 0) // 2
#define PROBE_PIN_SWDIO (PROBE_PIN_OFFSET + 1) // 3

// Target reset config
#define PROBE_PIN_RESET 6
Expand Down
15 changes: 7 additions & 8 deletions src/probe.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ void probe_assert_reset(bool state)
gpio_set_dir(PROBE_PIN_RESET, state);
}

void probe_write_bits(uint bit_count, uint8_t data_byte) {
void probe_write_bits(uint bit_count, uint32_t data_byte) {
DEBUG_PINS_SET(probe_timing, DBG_PIN_WRITE);
pio_sm_put_blocking(pio0, PROBE_SM, bit_count - 1);
pio_sm_put_blocking(pio0, PROBE_SM, data_byte);
Expand All @@ -117,14 +117,13 @@ void probe_write_bits(uint bit_count, uint8_t data_byte) {
DEBUG_PINS_CLR(probe_timing, DBG_PIN_WRITE);
}

uint8_t probe_read_bits(uint bit_count) {
uint32_t probe_read_bits(uint bit_count) {
DEBUG_PINS_SET(probe_timing, DBG_PIN_READ);
pio_sm_put_blocking(pio0, PROBE_SM, bit_count - 1);
uint32_t data = pio_sm_get_blocking(pio0, PROBE_SM);
uint8_t data_shifted = data >> 24;

if (bit_count < 8) {
data_shifted = data_shifted >> 8-bit_count;
uint32_t data_shifted = data;
if (bit_count < 32) {
data_shifted = data >> (32 - bit_count);
}

picoprobe_dump("Read %d bits 0x%x (shifted 0x%x)\n", bit_count, data, data_shifted);
Expand Down Expand Up @@ -207,7 +206,7 @@ void probe_handle_read(uint total_bits) {
} else {
chunk = bits;
}
probe.tx_buf[probe.tx_len] = probe_read_bits(chunk);
probe.tx_buf[probe.tx_len] = (uint8_t)probe_read_bits(chunk);
probe.tx_len++;
// Decrement remaining bits
bits -= chunk;
Expand All @@ -230,7 +229,7 @@ void probe_handle_write(uint8_t *data, uint total_bits) {
chunk = bits;
}

probe_write_bits(chunk, *data++);
probe_write_bits(chunk, (uint32_t)*data++);
bits -= chunk;
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/probe.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
#define PROBE_H_

void probe_set_swclk_freq(uint freq_khz);
void probe_write_bits(uint bit_count, uint8_t data_byte);
uint8_t probe_read_bits(uint bit_count);
void probe_write_bits(uint bit_count, uint32_t data_byte);
uint32_t probe_read_bits(uint bit_count);

void probe_read_mode(void);
void probe_write_mode(void);
Expand Down
56 changes: 30 additions & 26 deletions src/sw_dp_pio.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
/* Slight hack - we're not bitbashing so we need to set baudrate off the DAP's delay cycles.
* Ideally we don't want calls to udiv everywhere... */
#define MAKE_KHZ(x) (CPU_CLOCK / (2000 * ((x) + 1)))
static uint32_t cached_delay;

// Generate SWJ Sequence
// count: sequence bit count
Expand All @@ -42,8 +43,11 @@ void SWJ_Sequence (uint32_t count, const uint8_t *data) {
uint32_t bits;
uint32_t n;

probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay));
picoprobe_info("SWJ sequence count = %d FDB=0x%2x\n", count, data[0]);
if (DAP_Data.clock_delay != cached_delay) {
probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay));
cached_delay = DAP_Data.clock_delay;
}
picoprobe_debug("SWJ sequence count = %d FDB=0x%2x\n", count, data[0]);
n = count;
while (n > 0) {
if (n > 8)
Expand All @@ -66,8 +70,11 @@ void SWD_Sequence (uint32_t info, const uint8_t *swdo, uint8_t *swdi) {
uint32_t bits;
uint32_t n;

probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay));
picoprobe_info("SWD sequence\n");
if (DAP_Data.clock_delay != cached_delay) {
probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay));
cached_delay = DAP_Data.clock_delay;
}
picoprobe_debug("SWD sequence\n");
n = info & SWD_SEQUENCE_CLK;
if (n == 0U) {
n = 64U;
Expand Down Expand Up @@ -108,8 +115,11 @@ uint8_t SWD_Transfer (uint32_t request, uint32_t *data) {
uint32_t parity = 0;
uint32_t n;

probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay));
picoprobe_info("SWD_transfer\n");
if (DAP_Data.clock_delay != cached_delay) {
probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay));
cached_delay = DAP_Data.clock_delay;
}
picoprobe_debug("SWD_transfer\n");
/* Generate the request packet */
prq |= (1 << 0); /* Start Bit */
for (n = 1; n < 5; n++) {
Expand All @@ -124,28 +134,25 @@ uint8_t SWD_Transfer (uint32_t request, uint32_t *data) {

/* Turnaround (ignore read bits) */
probe_read_mode();
probe_read_bits(DAP_Data.swd_conf.turnaround);

ack = probe_read_bits(3);
ack = probe_read_bits(DAP_Data.swd_conf.turnaround + 3);
ack >>= DAP_Data.swd_conf.turnaround;

if (ack == DAP_TRANSFER_OK) {
/* Data transfer phase */
if (request & DAP_TRANSFER_RnW) {
parity = 0;
/* Read RDATA[0:31] - note probe_read shifts to LSBs */
for (n = 0; n < 32; n += 8) {
bit = probe_read_bits(8);
parity += __builtin_popcount(bit);
val |= (bit & 0xff) << n;
}
val = probe_read_bits(32);
bit = probe_read_bits(1);
parity = __builtin_popcount(val);
if ((parity ^ bit) & 1U) {
/* Parity error */
ack = DAP_TRANSFER_ERROR;
}
if (data)
*data = val;

picoprobe_debug("Read %02x ack %02x 0x%08x parity %01x\n",
prq, ack, val, bit);
/* Turnaround for line idle */
probe_read_bits(DAP_Data.swd_conf.turnaround);
probe_write_mode();
Expand All @@ -156,14 +163,12 @@ uint8_t SWD_Transfer (uint32_t request, uint32_t *data) {

/* Write WDATA[0:31] */
val = *data;
parity = 0;
for (n = 0; n < 32; n += 8) {
bit = (val >> n) & 0xff;
probe_write_bits(8, bit);
parity += __builtin_popcount(bit);
}
probe_write_bits(32, val);
parity = __builtin_popcount(val);
/* Write Parity Bit */
probe_write_bits(1, parity & 0x1);
picoprobe_debug("write %02x ack %02x 0x%08x parity %01x\n",
prq, ack, val, parity);
}
/* Capture Timestamp */
if (request & DAP_TRANSFER_TIMESTAMP) {
Expand All @@ -173,9 +178,9 @@ uint8_t SWD_Transfer (uint32_t request, uint32_t *data) {
/* Idle cycles - drive 0 for N clocks */
if (DAP_Data.transfer.idle_cycles) {
for (n = DAP_Data.transfer.idle_cycles; n; ) {
if (n > 8) {
probe_write_bits(8, 0);
n -= 8;
if (n > 32) {
probe_write_bits(32, 0);
n -= 32;
} else {
probe_write_bits(n, 0);
n -= n;
Expand All @@ -194,8 +199,7 @@ uint8_t SWD_Transfer (uint32_t request, uint32_t *data) {
probe_write_mode();
if (DAP_Data.swd_conf.data_phase && ((request & DAP_TRANSFER_RnW) == 0U)) {
/* Dummy Write WDATA[0:31] + Parity */
for (n = 0; n < 32; n += 8)
probe_write_bits(8, 0);
probe_write_bits(32, 0);
probe_write_bits(1, 0);
}
return ((uint8_t)ack);
Expand Down

0 comments on commit 47e2043

Please sign in to comment.