I have been wondering a while about this flash/sram concept that the GD32VF103 seems to use. Starting with the blinky test created by platformio, I modified the code to test the throughput of the processor.
At least for code around ~1024 bytes, there seems to be no penalties, what makes me think that maybe the code is really loaded into SRAM, I mean, the whole code. That could only be tested with a larger function. I saw in one of the threads about the GD32F103 that the flash is loaded into a cache. So you see the load process when a new cache line has to be filled. I do not see this here, yeah, the code is a bit simple. In a x86, similar code exposes the memory architecture thus I tried a similar approach here...
RiscV GD32VF103 Instruction throughput tests:
GPIOC 13 LED, test pin
CPU Clock 108 MHz (default setting). 9.52 ns/clock
Code
static void __attribute__((noinline, naked)) main_loop_15MHz( void )
{
asm volatile(
" lui a5, 0x40011 \n\t" // GPIOC
" addi a3, a5, 16 \n\t"
" lui a4, 0x2 \n\t"
"1: \n\t"
" sw a4, 20(a5) \n\t"
" addi a3, a5, 16 \n\t"
" sw a4, 16(a5) \n\t"
" addi a3, a5, 16 \n\t"
" j 1b \n\t"
);
}
Tek0000.csv
Signal freqnecy 15.49 MHz
High width 47.20 ns - 2 opcodes + jump
Low width 17.68 ns - 2 opcodes
trying to match the jump width
------------------------------
30 ns ~ 3 clocks at 108 Mhz
static void __attribute__((noinline, naked)) main_loop_10MHz( void )
{
asm volatile(
" lui a5, 0x40011 \n\t" // GPIOC
" addi a3, a5, 16 \n\t"
" lui a4, 0x2 \n\t"
"1: \n\t"
" sw a4, 20(a5) \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" sw a4, 16(a5) \n\t"
" addi a3, a5, 16 \n\t"
" j 1b \n\t"
);
}
Match !
Tek0001.csv
Signal freqnecy 10.89 MHz
High width 46.9 ns - 2 opcodes + jump
Low width 45.8 ns - 2 opcodes
The low pulse is consistently 1 ns shorter as the high pulse
The addi opcodes are 32 bit opcodes, it seems that there is no
difference between the width of the opcodes and the execution
time. Using the same target register seems to not impose
any penalties.
Check for chache line size, go for > 64 byte code
-------------------------------------------------
static void __attribute__((noinline, naked)) main_loop( void )
{
asm volatile(
" lui a5, 0x40011 \n\t" // GPIOC
" addi a3, a5, 16 \n\t"
" lui a4, 0x2 \n\t"
"1: \n\t"
" sw a4, 20(a5) \n\t" // low
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" sw a4, 16(a5) \n\t" // high
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" addi a3, a5, 16 \n\t"
" j 1b \n\t"
);
}
Tek0002.csv
Signal freqnecy 4.156 MHz
High width 121.2 ns - 11 opcodes + jump (+2) // 9.32 ns/opcode
Low width 120.7 ns - 13 opcodes = ~ 9.28 ns/opcode
No cache line fill detected
Try longer code > 128 bytes
---------------------------
static void __attribute__((noinline, naked)) main_loop( void )
{
asm volatile(
" lui a5, 0x40011 \n\t" // GPIOC
" addi a3, a5, 16 \n\t"
" lui a4, 0x2 \n\t"
"1: \n\t"
" sw a4, 20(a5) \n\t" // low
" addi a3, a5, 16 \n\t" // 44 times !
" sw a4, 16(a5) \n\t" // high
" addi a3, a5, 16 \n\t" // 42 times
" j 1b \n\t"
);
}
Tek0003.csv
44e - 2f6 = 344 bytes
Signal freqnecy 1.200 MHz
High width 417.6 ns - 43 opcodes + jump (+2) // 9.28 ns/opcode
Low width 415.8 ns - 45 opcodes = ~ 9,24 ns/opcode
Try longer code > 1024 bytes
---------------------------
static void __attribute__((noinline, naked)) main_loop( void )
{
asm volatile(
" lui a5, 0x40011 \n\t" // GPIOC
" addi a3, a5, 16 \n\t"
" lui a4, 0x2 \n\t"
"1: \n\t"
" sw a4, 20(a5) \n\t" // low
" addi a3, a5, 16 \n\t" // ** 172 times !
" sw a4, 16(a5) \n\t" // high
" addi a3, a5, 16 \n\t" // ** 170 times
" j 1b \n\t"
);
}
Tek0004.csv
84e - 2f6 = 1368 bytes
Signal freqnecy 312.2 kHz
High width 1602 ns - 171 opcodes + jump (+2) // 9.28 ns/opcode
Low width 1601 ns - 173 opcodes = ~ 9,25 ns/opcode
The Binky code is then:
/*!
\file main.c
\brief running led
\version 2019-6-5, V1.0.0, firmware for GD32VF103
*/
/*
Copyright (c) 2019, GigaDevice Semiconductor Inc.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
OF SUCH DAMAGE.
*/
#include "gd32vf103.h"
#include "systick.h"
#include <stdio.h>
/* BUILTIN LED OF LONGAN BOARDS IS PIN PC13 */
#define LED_PIN GPIO_PIN_13
#define LED_GPIO_PORT GPIOC
#define LED_GPIO_CLK RCU_GPIOC
static void main_loop( void );
void longan_led_init()
{
/* enable the led clock */
rcu_periph_clock_enable(RCU_GPIOA);
/* configure led GPIO port */
gpio_init(GPIOA, GPIO_MODE_OUT_PP, GPIO_OSPEED_50MHZ, GPIO_PIN_1);
GPIO_BC(GPIOA) = GPIO_PIN_1;
gpio_init(GPIOA, GPIO_MODE_OUT_PP, GPIO_OSPEED_50MHZ, GPIO_PIN_2);
GPIO_BC(GPIOA) = GPIO_PIN_2;
/* enable the led clock */
rcu_periph_clock_enable(LED_GPIO_CLK);
/* configure led GPIO port */
gpio_init(LED_GPIO_PORT, GPIO_MODE_OUT_PP, GPIO_OSPEED_50MHZ, LED_PIN);
GPIO_BC(LED_GPIO_PORT) = LED_PIN;
}
void longan_led_on()
{
GPIO_BOP(LED_GPIO_PORT) = LED_PIN;
}
void longan_led_off()
{
GPIO_BC(LED_GPIO_PORT) = LED_PIN;
}
/*!
\brief main function
\param[in] none
\param[out] none
\retval none
*/
int main(void)
{
longan_led_init();
main_loop();
}
// and here paste one of the main_loop functions
[code]