Author Topic: FNIRSI-1013D "100MHz" tablet oscilloscope  (Read 418912 times)

Tehnik, Smoczy, tokar and 5 Guests are viewing this topic.

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #925 on: July 31, 2021, 05:39:01 pm »
I'm looking into the menu slide performance issue and it looks like it is mainly compiler optimization.

This is from the original code. The assembler for sliding the main menu onto the screen.
Code: [Select]
                             **************************************************************
                             *                          FUNCTION                          *
                             **************************************************************
                             void __stdcall display_slide_down_main_menu(void)
             void              <VOID>         <RETURN>
             undefined4        Stack[-0x14]:4 local_14                                XREF[1]:     8000e7dc(*) 
             undefined4        Stack[-0x28]:4 local_28                                XREF[1]:     8000e7d8(*) 
                             display_slide_down_main_menu                    XREF[1]:     setup_main_menu:8002916c(c) 
        8000e6f8 00 5e 2d e9     stmdb      sp!,{r9 r10 r11 r12 lr}
        8000e6fc f0 01 2d e9     stmdb      sp!,{r4 r5 r6 r7 r8}
        8000e700 dc 00 9f e5     ldr        r0,[DAT_8000e7e4]                                = 001B1B1Bh
        8000e704 ea 52 00 eb     bl         convert_color                                    ushort convert_color(uint color)
        8000e708 00 40 a0 e1     mov        r4,r0
        8000e70c 00 00 a0 e3     mov        r0,#0x0
        8000e710 e7 52 00 eb     bl         convert_color                                    ushort convert_color(uint color)
        8000e714 cc 60 9f e5     ldr        r6,[PTR_DAT_8000e7e8]                            = 80192ef8
        8000e718 00 50 a0 e1     mov        r5,r0
        8000e71c 02 20 a0 e3     mov        r2,#0x2
        8000e720 2e 00 a0 e3     mov        r0,#0x2e
        8000e724 b0 20 c6 e1     strh       r2,[r6,#0x0]=>DAT_80192ef8                       = 0154h
        8000e728 97 10 a0 e3     mov        r1,#0x97
        8000e72c b2 00 c6 e1     strh       r0,[r6,#0x2]=>DAT_80192efa                       = 0914h
        8000e730 b4 10 c6 e1     strh       r1,[r6,#0x4]=>DAT_80192efc                       = 6828h
        8000e734 b6 00 c6 e1     strh       r0,[r6,#0x6]=>DAT_80192efe                       = 1409h
        8000e738 0e 24 00 eb     bl         set_screen_to_global_pointer                     void set_screen_to_global_pointe
        8000e73c a8 a0 9f e5     ldr        r10,[->BITMAP_MAIN_MENU]                         = 800ac420
        8000e740 a8 b0 9f e5     ldr        r11,[Global_Frame_Buffer_Pointer]                = 8019CF60h
        8000e744 eb 00 a0 e3     mov        r0,#0xeb
                             LAB_8000e748                                    XREF[1]:     8000e7d4(j) 
        8000e748 eb 00 50 e3     cmp        r0,#0xeb
        8000e74c 00 30 a0 e1     mov        r3,r0
        8000e750 19 00 00 2a     bcs        LAB_8000e7bc
                             LAB_8000e754                                    XREF[1]:     8000e7b8(j) 
        8000e754 b2 c0 d6 e1     ldrh       r12,[r6,#0x2]=>DAT_80192efa                      = 0914h
        8000e758 00 20 43 e0     sub        r2,r3,r0
        8000e75c b0 70 d6 e1     ldrh       r7,[r6,#0x0]=>DAT_80192ef8                       = 0154h
        8000e760 02 c0 8c e0     add        r12,r12,r2
        8000e764 95 10 a0 e3     mov        r1,#0x95
        8000e768 93 01 01 e0     mul        r1,r3,r1
        8000e76c 8c 91 8c e0     add        r9,r12,r12, lsl #0x3
        8000e770 00 80 9b e5     ldr        r8,[r11,#0x0]=>DAT_8019cf60
        8000e774 0c c2 89 e0     add        r12,r9,r12, lsl #0x4
        8000e778 8c c2 87 e0     add        r12,r7,r12, lsl #0x5
        8000e77c 95 20 a0 e3     mov        r2,#0x95
        8000e780 81 10 8a e0     add        r1,r10,r1, lsl #0x1
        8000e784 8c 70 88 e0     add        r7,r8,r12, lsl #0x1
                             LAB_8000e788                                    XREF[1]:     8000e7a8(j) 
        8000e788 b2 c0 d1 e0     ldrh       r12,[r1],#0x2=>TEXT_BITMAP_CALIB_START
        8000e78c 00 00 5c e3     cmp        r12,#0x0
        8000e790 04 c0 a0 01     moveq      r12,r4
        8000e794 01 00 00 0a     beq        LAB_8000e7a0
        8000e798 3e 0b 5c e3     cmp        r12,#0xf800
        8000e79c 05 c0 a0 01     moveq      r12,r5
                             LAB_8000e7a0                                    XREF[1]:     8000e794(j) 
        8000e7a0 01 20 52 e2     subs       r2,r2,#0x1
        8000e7a4 b2 c0 c7 e0     strh       r12,[r7],#0x2
        8000e7a8 f6 ff ff 1a     bne        LAB_8000e788
        8000e7ac 01 10 83 e2     add        r1,r3,#0x1
        8000e7b0 01 38 c1 e3     bic        r3,r1,#0x10000
        8000e7b4 eb 00 53 e3     cmp        r3,#0xeb
        8000e7b8 e5 ff ff 3a     bcc        LAB_8000e754
                             LAB_8000e7bc                                    XREF[1]:     8000e750(j) 
        8000e7bc 30 10 9f e5     ldr        r1,[DAT_8000e7f4]                                = 0000E38Fh
        8000e7c0 90 01 01 e0     mul        r1,r0,r1
        8000e7c4 21 0a 40 e0     sub        r0,r0,r1, lsr #0x14
        8000e7c8 01 00 40 e2     sub        r0,r0,#0x1
        8000e7cc 00 08 a0 e1     mov        r0,r0, lsl #0x10
        8000e7d0 20 08 b0 e1     movs       r0,r0, lsr #0x10
        8000e7d4 db ff ff 1a     bne        LAB_8000e748
        8000e7d8 f0 01 bd e8     ldmia      sp!,{r4 r5 r6 r7 r8}=>local_28
        8000e7dc 00 5e bd e8     ldmia      sp!,{r9 r10 r11 r12 lr}=>local_14
        8000e7e0 de 23 00 ea     b          set_frame_to_global_pointer


This is what Ghidra shows for the code I compile
Code: [Select]
                             **************************************************************
                             *                          FUNCTION                          *
                             **************************************************************
                             void __stdcall display_slide_top_rect_onto_screen(ushort
             void              <VOID>         <RETURN>
             ushort            r0:2           xpos
             ushort            r1:2           ypos
             ushort            r2:2           width
             ushort            r3:2           height
             uint              Stack[0x0]:4   speed                                   XREF[2]:     80001a94(R),
                                                                                                   80001be0(R) 
             undefined4        Stack[-0x4]:4  local_4                                 XREF[2]:     80001a54(W),
                                                                                                   80001c28(*) 
             undefined4        Stack[-0xc]:4  local_c                                 XREF[4]:     80001b2c(W),
                                                                                                   80001b60(R),
                                                                                                   80001b98(R),
                                                                                                   80001ba0(W) 
             undefined4        Stack[-0x10]:4 local_10                                XREF[4]:     80001b14(W),
                                                                                                   80001b50(R),
                                                                                                   80001bb0(R),
                                                                                                   80001bb8(W) 
             undefined2        Stack[-0x12]:2 local_12                                XREF[7]:     80001ac4(W),
                                                                                                   80001af0(R),
                                                                                                   80001b30(R),
                                                                                                   80001bd8(R),
                                                                                                   80001bdc(R),
                                                                                                   80001c0c(W),
                                                                                                   80001c10(R) 
             undefined2        Stack[-0x14]:2 local_14                                XREF[4]:     80001b34(W),
                                                                                                   80001bbc(R),
                                                                                                   80001bc4(W),
                                                                                                   80001bc8(R) 
             undefined2        Stack[-0x16]:2 local_16                                XREF[6]:     80001b40(W),
                                                                                                   80001b48(R),
                                                                                                   80001b58(R),
                                                                                                   80001b70(R),
                                                                                                   80001b78(W),
                                                                                                   80001b7c(R) 
             undefined4        Stack[-0x1c]:4 local_1c                                XREF[3]:     80001ae0(W),
                                                                                                   80001b04(R),
                                                                                                   80001b20(R) 
             undefined2        Stack[-0x22]:2 local_22                                XREF[2]:     80001a74(W),
                                                                                                   80001ac8(R) 
             undefined2        Stack[-0x24]:2 local_24                                XREF[2]:     80001a7c(W),
                                                                                                   80001acc(R) 
             undefined2        Stack[-0x26]:2 local_26                                XREF[2]:     80001a84(W),
                                                                                                   80001b80(R) 
             undefined2        Stack[-0x28]:2 local_28                                XREF[4]:     80001a8c(W),
                                                                                                   80001a90(R),
                                                                                                   80001aa8(R),
                                                                                                   80001bcc(R) 
                             display_slide_top_rect_onto_screen              XREF[4]:     FUN_80005ae0:80005bac(c),
                                                                                          FUN_80005f40:8000608c(c),
                                                                                          FUN_8000667c:800067c8(c),
                                                                                          FUN_80006de8:80006f00(c) 
        80001a54 04 b0 2d e5     str        r11,[sp,#local_4]!
        80001a58 00 b0 8d e2     add        r11,sp,#0x0
        80001a5c 24 d0 4d e2     sub        sp,sp,#0x24
        80001a60 00 c0 a0 e1     mov        r12,xpos
        80001a64 01 00 a0 e1     mov        xpos,ypos
        80001a68 02 10 a0 e1     mov        ypos,width
        80001a6c 03 20 a0 e1     mov        width,height
        80001a70 0c 30 a0 e1     mov        height,r12
        80001a74 be 31 4b e1     strh       height,[r11,#local_22]
        80001a78 00 30 a0 e1     mov        height,xpos
        80001a7c b0 32 4b e1     strh       height,[r11,#local_24]
        80001a80 01 30 a0 e1     mov        height,ypos
        80001a84 b2 32 4b e1     strh       height,[r11,#local_26]
        80001a88 02 30 a0 e1     mov        height,width
        80001a8c b4 32 4b e1     strh       height,[r11,#local_28]
        80001a90 b4 32 5b e1     ldrh       height,[r11,#local_28]
        80001a94 04 20 9b e5     ldr        width,[r11,#speed]
        80001a98 92 03 03 e0     mul        height,width,height
        80001a9c 23 3a a0 e1     mov        height,height, lsr #0x14
        80001aa0 03 38 a0 e1     mov        height,height, lsl #0x10
        80001aa4 23 38 a0 e1     mov        height,height, lsr #0x10
        80001aa8 b4 22 5b e1     ldrh       width,[r11,#local_28]
        80001aac 03 30 42 e0     sub        height,width,height
        80001ab0 03 38 a0 e1     mov        height,height, lsl #0x10
        80001ab4 23 38 a0 e1     mov        height,height, lsr #0x10
        80001ab8 01 30 43 e2     sub        height,height,#0x1
        80001abc 03 38 a0 e1     mov        height,height, lsl #0x10
        80001ac0 23 38 a0 e1     mov        height,height, lsr #0x10
        80001ac4 be 30 4b e1     strh       height,[r11,#local_12]
        80001ac8 be 21 5b e1     ldrh       width,[r11,#local_22]
        80001acc b0 32 5b e1     ldrh       height,[r11,#local_24]
        80001ad0 58 11 9f e5     ldr        ypos,[DAT_80001c30]                              = 80012208h
        80001ad4 b0 12 d1 e1     ldrh       ypos,[ypos,#0x20]=>DAT_80012228
        80001ad8 91 03 03 e0     mul        height,ypos,height
        80001adc 03 30 82 e0     add        height,width,height
        80001ae0 18 30 0b e5     str        height,[r11,#local_1c]
        80001ae4 49 00 00 ea     b          LAB_80001c10
                             LAB_80001ae8                                    XREF[1]:     80001c18(j) 
        80001ae8 40 31 9f e5     ldr        height,[DAT_80001c30]                            = 80012208h
        80001aec 0c 20 93 e5     ldr        width,[height,#0xc]=>DAT_80012214
        80001af0 fe 30 5b e1     ldrsh      height,[r11,#local_12]
        80001af4 34 11 9f e5     ldr        ypos,[DAT_80001c30]                              = 80012208h
        80001af8 b0 12 d1 e1     ldrh       ypos,[ypos,#0x20]=>DAT_80012228
        80001afc 91 03 03 e0     mul        height,ypos,height
        80001b00 03 10 a0 e1     mov        ypos,height
        80001b04 18 30 1b e5     ldr        height,[r11,#local_1c]
        80001b08 03 30 81 e0     add        height,ypos,height
        80001b0c 83 30 a0 e1     mov        height,height, lsl #0x1
        80001b10 03 30 82 e0     add        height,width,height
        80001b14 0c 30 0b e5     str        height,[r11,#local_10]
        80001b18 10 31 9f e5     ldr        height,[DAT_80001c30]                            = 80012208h
        80001b1c 08 20 93 e5     ldr        width,[height,#0x8]=>DAT_80012210
        80001b20 18 30 1b e5     ldr        height,[r11,#local_1c]
        80001b24 83 30 a0 e1     mov        height,height, lsl #0x1
        80001b28 03 30 82 e0     add        height,width,height
        80001b2c 08 30 0b e5     str        height,[r11,#local_c]
        80001b30 be 30 5b e1     ldrh       height,[r11,#local_12]
        80001b34 b0 31 4b e1     strh       height,[r11,#local_14]
        80001b38 22 00 00 ea     b          LAB_80001bc8
                             LAB_80001b3c                                    XREF[1]:     80001bd4(j) 
        80001b3c 00 30 a0 e3     mov        height,#0x0
        80001b40 b2 31 4b e1     strh       height,[r11,#local_16]
        80001b44 0c 00 00 ea     b          LAB_80001b7c
                             LAB_80001b48                                    XREF[1]:     80001b88(j) 
        80001b48 b2 31 5b e1     ldrh       height,[r11,#local_16]
        80001b4c 83 30 a0 e1     mov        height,height, lsl #0x1
        80001b50 0c 20 1b e5     ldr        width,[r11,#local_10]
        80001b54 03 20 82 e0     add        width,width,height
        80001b58 b2 31 5b e1     ldrh       height,[r11,#local_16]
        80001b5c 83 30 a0 e1     mov        height,height, lsl #0x1
        80001b60 08 10 1b e5     ldr        ypos,[r11,#local_c]
        80001b64 03 30 81 e0     add        height,ypos,height
        80001b68 b0 20 d2 e1     ldrh       width,[width,#0x0]
        80001b6c b0 20 c3 e1     strh       width,[height,#0x0]
        80001b70 b2 31 5b e1     ldrh       height,[r11,#local_16]
        80001b74 01 30 83 e2     add        height,height,#0x1
        80001b78 b2 31 4b e1     strh       height,[r11,#local_16]
                             LAB_80001b7c                                    XREF[1]:     80001b44(j) 
        80001b7c b2 21 5b e1     ldrh       width,[r11,#local_16]
        80001b80 b2 32 5b e1     ldrh       height,[r11,#local_26]
        80001b84 03 00 52 e1     cmp        width,height
        80001b88 ee ff ff 9a     bls        LAB_80001b48
        80001b8c 9c 30 9f e5     ldr        height,[DAT_80001c30]                            = 80012208h
        80001b90 b0 32 d3 e1     ldrh       height,[height,#0x20]=>DAT_80012228
        80001b94 83 30 a0 e1     mov        height,height, lsl #0x1
        80001b98 08 20 1b e5     ldr        width,[r11,#local_c]
        80001b9c 03 30 82 e0     add        height,width,height
        80001ba0 08 30 0b e5     str        height,[r11,#local_c]
        80001ba4 84 30 9f e5     ldr        height,[DAT_80001c30]                            = 80012208h
        80001ba8 b0 32 d3 e1     ldrh       height,[height,#0x20]=>DAT_80012228
        80001bac 83 30 a0 e1     mov        height,height, lsl #0x1
        80001bb0 0c 20 1b e5     ldr        width,[r11,#local_10]
        80001bb4 03 30 82 e0     add        height,width,height
        80001bb8 0c 30 0b e5     str        height,[r11,#local_10]
        80001bbc b0 31 5b e1     ldrh       height,[r11,#local_14]
        80001bc0 01 30 83 e2     add        height,height,#0x1
        80001bc4 b0 31 4b e1     strh       height,[r11,#local_14]
                             LAB_80001bc8                                    XREF[1]:     80001b38(j) 
        80001bc8 b0 21 5b e1     ldrh       width,[r11,#local_14]
        80001bcc b4 32 5b e1     ldrh       height,[r11,#local_28]
        80001bd0 03 00 52 e1     cmp        width,height
        80001bd4 d8 ff ff 9a     bls        LAB_80001b3c
        80001bd8 be 20 5b e1     ldrh       width,[r11,#local_12]
        80001bdc fe 30 5b e1     ldrsh      height,[r11,#local_12]
        80001be0 04 10 9b e5     ldr        ypos,[r11,#speed]
        80001be4 91 03 03 e0     mul        height,ypos,height
        80001be8 23 3a a0 e1     mov        height,height, lsr #0x14
        80001bec 03 38 a0 e1     mov        height,height, lsl #0x10
        80001bf0 23 38 a0 e1     mov        height,height, lsr #0x10
        80001bf4 03 30 42 e0     sub        height,width,height
        80001bf8 03 38 a0 e1     mov        height,height, lsl #0x10
        80001bfc 23 38 a0 e1     mov        height,height, lsr #0x10
        80001c00 01 30 43 e2     sub        height,height,#0x1
        80001c04 03 38 a0 e1     mov        height,height, lsl #0x10
        80001c08 23 38 a0 e1     mov        height,height, lsr #0x10
        80001c0c be 30 4b e1     strh       height,[r11,#local_12]
                             LAB_80001c10                                    XREF[1]:     80001ae4(j) 
        80001c10 fe 30 5b e1     ldrsh      height,[r11,#local_12]
        80001c14 00 00 53 e3     cmp        height,#0x0
        80001c18 b2 ff ff aa     bge        LAB_80001ae8
        80001c1c 00 00 a0 e1     mov        xpos,xpos
        80001c20 00 00 a0 e1     mov        xpos,xpos
        80001c24 00 d0 8b e2     add        sp,r11,#0x0
        80001c28 04 b0 9d e4     ldr        r11=>local_4,[sp],#0x4
        80001c2c 1e ff 2f e1     bx         lr


It is much longer for the loop portion. The psuedo C output of Ghidra is not much different for the both

This is my C code
Code: [Select]
void display_slide_top_rect_onto_screen(uint16 xpos, uint16 ypos, uint16 width, uint16 height, uint32 speed)
{
  uint16 *ptr1, *ptr2;
  int16   startline;     //Needs to be an int because it has to become negative to stop
  uint16  line;
  uint32  startxy;
  uint16  pixel;

  //Starting line of the rectangle to display first
  startline = height - ((height * speed) >> 20) - 1;
 
  //Start x,y offset for source and destination calculation
  startxy = xpos + (ypos * displaydata.pixelsperline);
 
  //Draw lines as long as is needed to get the whole rectangle on screen
  while(startline >= 0)
  {
    //Source pointer is based on the current line
    ptr2 = displaydata.sourcebuffer + startxy + (startline * displaydata.pixelsperline);
   
    //Destination pointer is always the first line
    ptr1 = displaydata.screenbuffer + startxy;
   
    //Handle the needed number of lines for this loop
    for(line=startline;line<=height;line++)
    {
      //Copy a single line to the screen buffer
      for(pixel=0;pixel<=width;pixel++)
      {
        //Copy one pixel at a time
        ptr1[pixel] = ptr2[pixel];
      }
     
      //Point to the next line of pixels in both destination and source
      ptr1 += displaydata.pixelsperline;
      ptr2 += displaydata.pixelsperline;
    }
   
    //Calculate the new starting line
    startline = startline - 1 - ((startline * speed) >> 20);
  }
}


This is what Ghidra makes of it
Code: [Select]
void display_slide_top_rect_onto_screen(ushort xpos,ushort ypos,ushort width,ushort height,uint speed)
{
  int iVar1;
  ushort local_16;
  ushort local_14;
  ushort local_12;
  int local_10;
  int local_c;
 
  local_12 = (ushort)((((uint)height - (speed * height >> 0x14) & 0xffff) - 1) * 0x10000 >> 0x10);
  iVar1 = (uint)xpos + (uint)*(ushort *)(DAT_80001c30 + 0x20) * (uint)ypos;

  while (-1 < (short)local_12)
  {
    local_10 = *(int *)(DAT_80001c30 + 0xc) + ((uint)*(ushort *)(DAT_80001c30 + 0x20) * (int)(short)local_12 + iVar1) * 2;
    local_c = *(int *)(DAT_80001c30 + 8) + iVar1 * 2;
    local_14 = local_12;

    while (local_14 <= height)
    {
      local_16 = 0;
      while (local_16 <= width)
      {
        *(undefined2 *)(local_c + (uint)local_16 * 2) = *(undefined2 *)(local_10 + (uint)local_16 * 2);
        local_16 = local_16 + 1;
      }

      local_c = local_c + (uint)*(ushort *)(DAT_80001c30 + 0x20) * 2;
      local_10 = local_10 + (uint)*(ushort *)(DAT_80001c30 + 0x20) * 2;
      local_14 = local_14 + 1;
    }

    local_12 = (ushort)((((uint)local_12 - (speed * (int)(short)local_12 >> 0x14) & 0xffff) - 1) * 0x10000 >> 0x10);
  }
}


I can probably speed things up a bit by changing the variables to full integers instead of shorts. Only the pixel pointers need to be unsigned shorts.

If that does not do the trick I will have to raise the compilers optimization level and see what comes of that.

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #926 on: July 31, 2021, 07:49:33 pm »
Managed to speed things up, but I'm not sure if it is the same as the original. Difficult to compare with only on scope :(

Told the compiler to keep variables in registers, which made a difference.
Code: [Select]
void display_slide_top_rect_onto_screen(uint32 xpos, uint32 ypos, uint32 width, uint32 height, uint32 speed)
{
  register uint16 *ptr1, *ptr2;
  register int32   startline;     //Needs to be an int because it has to become negative to stop
  register uint32  line;
  register uint32  startxy;
  register uint32  pixel;
  register uint32  pixels = displaydata.pixelsperline;

  //Starting line of the rectangle to display first
  startline = height - ((height * speed) >> 20) - 1;
 
  //Start x,y offset for source and destination calculation
  startxy = xpos + (ypos * displaydata.pixelsperline);
 
  //Draw lines as long as is needed to get the whole rectangle on screen
  while(startline >= 0)
  {
    //Source pointer is based on the current line
    ptr2 = displaydata.sourcebuffer + startxy + (startline * pixels);
   
    //Destination pointer is always the first line
    ptr1 = displaydata.screenbuffer + startxy;
   
    //Handle the needed number of lines for this loop
    for(line=startline;line<=height;line++)
    {
      //Copy a single line to the screen buffer
      for(pixel=0;pixel<=width;pixel++)
      {
        //Copy one pixel at a time
        ptr1[pixel] = ptr2[pixel];
      }
     
      //Point to the next line of pixels in both destination and source
      ptr1 += pixels;
      ptr2 += pixels;
    }
   
    //Calculate the new starting line
    startline = startline - 1 - ((startline * speed) >> 20);
  }
}


It is turned into this
Code: [Select]
                             **************************************************************
                             *                          FUNCTION                          *
                             **************************************************************
                             void __stdcall display_slide_top_rect_onto_screen(ushort
             void              <VOID>         <RETURN>
             ushort            r0:2           xpos
             ushort            r1:2           ypos
             ushort            r2:2           width
             ushort            r3:2           height
             uint              Stack[0x0]:4   speed                                   XREF[2]:     80001a80(R),
                                                                                                   80001b48(R) 
             undefined4        Stack[-0x20]:4 local_20                                XREF[1]:     80001b74(*) 
             undefined4        Stack[-0x24]:4 local_24                                XREF[2]:     80001a60(W),
                                                                                                   80001ab0(R) 
             undefined4        Stack[-0x28]:4 local_28                                XREF[2]:     80001a64(W),
                                                                                                   80001aa8(R) 
             undefined4        Stack[-0x2c]:4 local_2c                                XREF[2]:     80001a68(W),
                                                                                                   80001b14(R) 
             undefined4        Stack[-0x30]:4 local_30                                XREF[4]:     80001a6c(W),
                                                                                                   80001a7c(R),
                                                                                                   80001a8c(R),
                                                                                                   80001b34(R) 
                             display_slide_top_rect_onto_screen              XREF[4]:     FUN_80005a2c:80005af8(c),
                                                                                          FUN_80005e8c:80005fd8(c),
                                                                                          FUN_800065c8:80006714(c),
                                                                                          FUN_80006d34:80006e4c(c) 
        80001a54 f0 0f 2d e9     stmdb      sp!,{r4 r5 r6 r7 r8 r9 r10 r11}
        80001a58 1c b0 8d e2     add        r11,sp,#0x1c
        80001a5c 10 d0 4d e2     sub        sp,sp,#0x10
        80001a60 20 00 0b e5     str        xpos,[r11,#local_24]
        80001a64 24 10 0b e5     str        ypos,[r11,#local_28]
        80001a68 28 20 0b e5     str        width,[r11,#local_2c]
        80001a6c 2c 30 0b e5     str        height,[r11,#local_30]
        80001a70 04 31 9f e5     ldr        height,[PTR_DAT_80001b7c]                        = 80012154
        80001a74 b0 32 d3 e1     ldrh       height,[height,#0x20]=>DAT_80012174
        80001a78 03 70 a0 e1     mov        r7,height
        80001a7c 2c 30 1b e5     ldr        height,[r11,#local_30]
        80001a80 04 20 9b e5     ldr        width,[r11,#speed]
        80001a84 92 03 03 e0     mul        height,width,height
        80001a88 23 3a a0 e1     mov        height,height, lsr #0x14
        80001a8c 2c 20 1b e5     ldr        width,[r11,#local_30]
        80001a90 03 30 42 e0     sub        height,width,height
        80001a94 01 30 43 e2     sub        height,height,#0x1
        80001a98 03 a0 a0 e1     mov        r10,height
        80001a9c d8 30 9f e5     ldr        height,[PTR_DAT_80001b7c]                        = 80012154
        80001aa0 b0 32 d3 e1     ldrh       height,[height,#0x20]=>DAT_80012174
        80001aa4 03 20 a0 e1     mov        width,height
        80001aa8 24 30 1b e5     ldr        height,[r11,#local_28]
        80001aac 93 02 02 e0     mul        width,height,width
        80001ab0 20 30 1b e5     ldr        height,[r11,#local_24]
        80001ab4 03 90 82 e0     add        r9,width,height
        80001ab8 28 00 00 ea     b          LAB_80001b60
                             LAB_80001abc                                    XREF[1]:     80001b64(j) 
        80001abc b8 30 9f e5     ldr        height,[PTR_DAT_80001b7c]                        = 80012154
        80001ac0 0c 20 93 e5     ldr        width,[height,#0xc]=>DAT_80012160
        80001ac4 0a 30 a0 e1     mov        height,r10
        80001ac8 97 03 03 e0     mul        height,r7,height
        80001acc 03 30 89 e0     add        height,r9,height
        80001ad0 83 30 a0 e1     mov        height,height, lsl #0x1
        80001ad4 03 60 82 e0     add        r6,width,height
        80001ad8 9c 30 9f e5     ldr        height,[PTR_DAT_80001b7c]                        = 80012154
        80001adc 08 20 93 e5     ldr        width,[height,#0x8]=>DAT_8001215c
        80001ae0 89 30 a0 e1     mov        height,r9, lsl #0x1
        80001ae4 03 50 82 e0     add        r5,width,height
        80001ae8 0a 80 a0 e1     mov        r8,r10
        80001aec 10 00 00 ea     b          LAB_80001b34
                             LAB_80001af0                                    XREF[1]:     80001b3c(j) 
        80001af0 00 40 a0 e3     mov        r4,#0x0
        80001af4 06 00 00 ea     b          LAB_80001b14
                             LAB_80001af8                                    XREF[1]:     80001b1c(j) 
        80001af8 84 30 a0 e1     mov        height,r4, lsl #0x1
        80001afc 03 20 86 e0     add        width,r6,height
        80001b00 84 30 a0 e1     mov        height,r4, lsl #0x1
        80001b04 03 30 85 e0     add        height,r5,height
        80001b08 b0 20 d2 e1     ldrh       width,[width,#0x0]
        80001b0c b0 20 c3 e1     strh       width,[height,#0x0]
        80001b10 01 40 84 e2     add        r4,r4,#0x1
                             LAB_80001b14                                    XREF[1]:     80001af4(j) 
        80001b14 28 30 1b e5     ldr        height,[r11,#local_2c]
        80001b18 03 00 54 e1     cmp        r4,height
        80001b1c f5 ff ff 9a     bls        LAB_80001af8
        80001b20 87 30 a0 e1     mov        height,r7, lsl #0x1
        80001b24 03 50 85 e0     add        r5,r5,height
        80001b28 87 30 a0 e1     mov        height,r7, lsl #0x1
        80001b2c 03 60 86 e0     add        r6,r6,height
        80001b30 01 80 88 e2     add        r8,r8,#0x1
                             LAB_80001b34                                    XREF[1]:     80001aec(j) 
        80001b34 2c 30 1b e5     ldr        height,[r11,#local_30]
        80001b38 03 00 58 e1     cmp        r8,height
        80001b3c eb ff ff 9a     bls        LAB_80001af0
        80001b40 0a 20 a0 e1     mov        width,r10
        80001b44 0a 10 a0 e1     mov        ypos,r10
        80001b48 04 30 9b e5     ldr        height,[r11,#speed]
        80001b4c 91 03 03 e0     mul        height,ypos,height
        80001b50 23 3a a0 e1     mov        height,height, lsr #0x14
        80001b54 03 30 42 e0     sub        height,width,height
        80001b58 01 30 43 e2     sub        height,height,#0x1
        80001b5c 03 a0 a0 e1     mov        r10,height
                             LAB_80001b60                                    XREF[1]:     80001ab8(j) 
        80001b60 00 00 5a e3     cmp        r10,#0x0
        80001b64 d4 ff ff aa     bge        LAB_80001abc
        80001b68 00 00 a0 e1     mov        xpos,xpos
        80001b6c 00 00 a0 e1     mov        xpos,xpos
        80001b70 1c d0 4b e2     sub        sp,r11,#0x1c
        80001b74 f0 0f bd e8     ldmia      sp!,{r4 r5 r6 r7 r8 r9 r10 r11}=>local_20
        80001b78 1e ff 2f e1     bx         lr
                             PTR_DAT_80001b7c                                XREF[4]:     display_slide_top_rect_onto_scre
                                                                                          display_slide_top_rect_onto_scre
                                                                                          display_slide_top_rect_onto_scre
                                                                                          display_slide_top_rect_onto_scre
        80001b7c 54 21 01 80     addr       DAT_80012154


Think it is a good idea to adapt my display library to this change, so a bit of re-coding to do, but that is how it goes whilst learning.

Offline DavidAlfa

  • Super Contributor
  • ***
  • Posts: 5912
  • Country: es
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #927 on: July 31, 2021, 07:50:30 pm »
It's slow because a for/for/for loop drawing one pixel at a time is extremely unefficient!
Code: [Select]
      //Copy a single line to the screen buffer
      for(pixel=0;pixel<=width;pixel++)
      {
        //Copy one pixel at a time
        ptr1[pixel] = ptr2[pixel];
      }     


Instead, try something like:
Code: [Select]
    //Copy a single line to the screen buffer
    memcpy( ptr1, ptr2, 2*width);     // *2 because memcpy works with bytes, and you're copying uint16_t!

   // You might need to do this instead:
    memcpy((uint8_t*)ptr1, (uint8_t*)ptr2, 2*width);
« Last Edit: July 31, 2021, 08:08:12 pm by DavidAlfa »
Hantek DSO2x1x            Drive        FAQ          DON'T BUY HANTEK! (Aka HALF-MADE)
Stm32 Soldering FW      Forum      Github      Donate
 

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #928 on: July 31, 2021, 07:52:58 pm »
It's slow because a for/for/for loop drawing one pixel at a time is extremely unefficient!

That might be, but the original code does the same.

But an example on how to do it the right way is always welcomed.

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #929 on: July 31, 2021, 08:02:27 pm »
It's slow because a for/for/for loop drawing one pixel at a time is extremely unefficient!
Code: [Select]
      //Copy a single line to the screen buffer
      for(pixel=0;pixel<=width;pixel++)
      {
        //Copy one pixel at a time
        ptr1[pixel] = ptr2[pixel];
      }     


Instead, try something like:
Code: [Select]
    //Copy a single line to the screen buffer
    memcpy(ptr1[pixel], ptr2[pixel], 2*width);     // *2 because memcpy works with bytes, and you're copying uint16_t!

That is a good suggestion :-+ I will give it a try.

Offline DavidAlfa

  • Super Contributor
  • ***
  • Posts: 5912
  • Country: es
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #930 on: July 31, 2021, 08:02:34 pm »
Are you enabling the I cache and D cache? Setting the cpu to 600MHz? And dram to 156MHz?
Damn, I corrected the code too late, you had already quoted me! :-DD

Sure, the original code coming from china, what a suprise it's crap!
Same with the soldering stations, stock fw updating the screen at 5fps, extrenmely laggy system,  while I can easily do 50x that with cpu mostly idling!
« Last Edit: July 31, 2021, 08:06:17 pm by DavidAlfa »
Hantek DSO2x1x            Drive        FAQ          DON'T BUY HANTEK! (Aka HALF-MADE)
Stm32 Soldering FW      Forum      Github      Donate
 

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #931 on: July 31, 2021, 08:04:48 pm »
Yes the caches are enabled. CPU is running on 600MHz. Not sure about the memory though. I used code I found on the net and did not looked into it yet. So maybe it is set to slow.

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #932 on: July 31, 2021, 08:08:16 pm »
Damn, I corrected the code too late, you had already quoted me! :-DD

I did not even see the error in it. Just read the memcpy function name and thought yes, that is an option. Might be a problem that I'm not using standard lib.

Using "-T./fnirsi_1013d.ld -nostdlib -lgcc" this for the linker.

But that is for tomorrow. :=\

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #933 on: August 01, 2021, 04:49:11 am »
It's slow because a for/for/for loop drawing one pixel at a time is extremely unefficient!

I have been thinking about this and it is only true for architectures like ARM. For older CPU's like Z80, 6502, 8051 and maybe even the 8 bit AVR ones (not to familiar with those assembler wise) it makes no difference when the compiler optimizes.

On ARM the use of LDM and STM instructions will improve the efficiency since they can copy multiple words in a single instruction. The data still needs to be transferred over the bus though, and that is where bus width, and caching comes into play.

The first 15 years of my career I have written a lot of assembler for embedded devices using 6502, 6809 or 8051. Needed to be extremely efficient sometimes and shave of an instruction or two to get the wanted performance. We were talking about microseconds back in those days. Now it is nanoseconds or even less.

But it is nice to hear there are still true professionals out there who think about this. And not like what I have seen a lot, the idiots who think programming is easy and fully depend on their SDK's and whatever tools there are to make programs that are shit |O

I'm rusty and need to get the wheels going again. Forgotten a lot about proper programming in the last 15 years.

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #934 on: August 01, 2021, 09:23:28 am »
The memcpy function certainly helped. But I still have the feeling the original is faster :-//

Found an assembly source of a memcpy function, which allowed me to keep away from the standard library. The problem with the standard library is it needs the standard io implemented and I rather do without that.

Tried to compile with -O1, which reduces on code size, but the touchscreen then fails as expected. :palm:

Ah well that is for another day. Still a lot to do getting the rest of the code reversed.

Offline DavidAlfa

  • Super Contributor
  • ***
  • Posts: 5912
  • Country: es
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #935 on: August 01, 2021, 05:19:14 pm »
I usually compile with O2. The speed increase is noticeable.
Why does the optimization break the touchscreen?
Hantek DSO2x1x            Drive        FAQ          DON'T BUY HANTEK! (Aka HALF-MADE)
Stm32 Soldering FW      Forum      Github      Donate
 

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #936 on: August 01, 2021, 05:32:22 pm »
I usually compile with O2. The speed increase is noticeable.
Why does the optimization break the touchscreen?

The touch panel delay routine was an empty for loop. This is removed by the compiler when optimization is set to 1 or higher. Changed it to an assembly loop and now it works with optimization level O2.

The speed of the sliding in of the menu's is much better now. There will still be room for improvement, but the first goal is to get working code that mimics the scope.

And that is coming along, but still a lot of work. Put in the code for changing the screen brightness. It uses special functionality of the FPGA, for which I reversed the code a while back but did not test it until now. Happy to say that it works.

Offline DavidAlfa

  • Super Contributor
  • ***
  • Posts: 5912
  • Country: es
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #937 on: August 01, 2021, 05:39:16 pm »
Insert asm("nop"); in the loop and it won't be optimized
Hantek DSO2x1x            Drive        FAQ          DON'T BUY HANTEK! (Aka HALF-MADE)
Stm32 Soldering FW      Forum      Github      Donate
 
The following users thanked this post: pcprogrammer

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #938 on: August 02, 2021, 09:35:41 am »
The findings of the person who did the bashing of the FNIRSI-1013D did not even scratch the surface of what is wrong with it. (https://www.beis.de/Elektronik/FNIRSI-1013D/FNIRSI-1013D_en.html)

Going through the software to be able to reverse it showed me that in single trigger mode the time base can't go up above 10ms/div. For normal mode it won't go up above 50ms/div. :palm:

For getting a grasp on how they do the moving of the lines on the screen I connected channel 1 to the calibration output. Probe setting on x1 with the actual probe on x10 shows 3,17Vpp. Switching the setting to x10 raises the measured voltage to 31,7Vpp, even after a new auto set. Switching the setting to x100 shows 317Vpp.

Should have been 3,17Vpp for the probe on x10 setting since the actual probe is set as such |O (Assuming the voltage is actually 3.17Vpp)

Then I took pictures of it with the save pic button. See below for what it made of it when transferred to my computer. :-DD
On the scope itself they look ok.

Edit: Turns out the probe is crap. Tried the other one and that works as expected. But still the image stuff >:(
« Last Edit: August 02, 2021, 09:42:14 am by pcprogrammer »
 

Offline DavidAlfa

  • Super Contributor
  • ***
  • Posts: 5912
  • Country: es
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #939 on: August 02, 2021, 01:26:10 pm »
You could use the display front-end engine (Called DEBE in the datasheet) to merge different layers and tiles.
So you could copy the BMP as a tile and config the engine with an offset, timing the offset increase to provide a smooth effect.
You'll avoid copying the memory using CPU power.
« Last Edit: August 02, 2021, 01:38:57 pm by DavidAlfa »
Hantek DSO2x1x            Drive        FAQ          DON'T BUY HANTEK! (Aka HALF-MADE)
Stm32 Soldering FW      Forum      Github      Donate
 

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #940 on: August 02, 2021, 01:50:55 pm »
Hi David,

I did notice that DEFE (front end) device but since there is little documentation about it didn't consider using it yet. Certainly an option for later improvements where the scope signal stay's alive in the background whilst changing a setting. If that is possible of course.

Getting the scope up and running as is, is already a lot of work. Not yet time for big improvements :)

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #941 on: August 03, 2021, 02:00:22 pm »
It looks like the scope has some self destruct code in it :-DD

Not sure under what conditions it would be called upon but reading the code shows it would clear the first 1000 bytes of the program in the flash :palm:

Code: [Select]
                             LAB_8000a4f8                                    XREF[1]:     8000a4a0(j) 
        8000a4f8 48 00 9f e5     ldr        r0,[DAT_8000a548]                                = 80360BA8h
        8000a4fc fa 1f a0 e3     mov        r1,#0x3e8
        8000a500 dc d8 ff eb     bl         memclear                                         void memclear(uint address, uint
        8000a504 3c 10 9f e5     ldr        r1,[DAT_8000a548]                                = 80360BA8h
        8000a508 fa 2f a0 e3     mov        r2,#0x3e8
        8000a50c 27 0a a0 e3     mov        r0,#0x27000                                      Start address of program in flash
        8000a510 24 6b 00 eb     bl         write_to_flash                                   void write_to_flash(uint address
        8000a514 16 77 00 eb     bl         sys_init_watchdog                                void sys_init_watchdog(void)
        8000a518 a3 df 8d e2     add        sp,sp,#0x28c
        8000a51c f0 01 bd e8     ldmia      sp!,{r4 r5 r6 r7 r8}=>local_24
        8000a520 00 8e bd e8     ldmia      sp!,{r9 r10 r11 pc}

This is in a function that is called from the initialization part in the main function. FUN_8000a024.

Guess I will skip this one.

Implemented already most of the functions that interact with the FPGA. The input sensitivity can be adjusted. I can hear the relay's click :)
For some of the functions I still need to know more about the variables being used, so further investigation of the code is needed 8)

This means the reversal of the function that displays the trace data. I'm afraid it will take some time before there is something new to show. :popcorn:

Offline tv84

  • Super Contributor
  • ***
  • Posts: 3221
  • Country: pt
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #942 on: August 03, 2021, 02:21:06 pm »
Guess I will skip this one.

Don't agree.  You promised a full reversal! :-DD
 

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #943 on: August 03, 2021, 02:24:03 pm »
Guess I will skip this one.

Don't agree.  You promised a full reversal! :-DD

Well then I already failed since I skipped the whole display library and wrote my own :)

Offline DavidAlfa

  • Super Contributor
  • ***
  • Posts: 5912
  • Country: es
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #944 on: August 03, 2021, 09:50:23 pm »
Probably for bootloader updating  :-DD
Hantek DSO2x1x            Drive        FAQ          DON'T BUY HANTEK! (Aka HALF-MADE)
Stm32 Soldering FW      Forum      Github      Donate
 

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #945 on: August 05, 2021, 12:01:46 pm »
Sooner then expected an update :)

Found the functions for drawing the grid, pointers and cursors so implemented an interpretation of them in my code.

Also found more meaning on several of the FPGA commands. Still a lot of code to investigate, but hope to get trace data out soon. The rest is icing on the cake ;D

Will probably make changes to the displaying of the measurements, because I don't like the way they did it. Depending on how you make the settings the items for the channels become interleaved. My idea is to have channel 1 items on the left and channel 2 items on the right, and probably also in a fixed order.

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #946 on: August 07, 2021, 07:23:10 pm »
Going through the data capture code makes me wonder how it can even work just a little bit :palm:

For the longer time base settings, 100mS/div and up, it reads ten bytes from the fpga at a somewhat regular interval. It sums the bytes together and divides it by ten, so an average sample for maybe every x position on the screen. (Did not reach that part of the code yet) This explains the under sampling behavior that can be seen. Connect the probe to the 1KHz output and see what happens on these longer time bases. :-DD

For the shorter time base settings they read more bytes and for the two lowest settings (25 and 10nS/div) they read a second set of data, but for the first channel they write the second set over the first one. :o

Also the parameter ic connected to the fpga, on init they use it to read the time base setting that needs to be written to the fpga. Later on in the code they use a switch statement to get the value. The same goes for an adjustment parameter they use. In the long time base handling they use more or less fixed values from the code (setup on init). In the short time base handling they call the parameter ic with id 0x0B and get the exact same values. |O

I'm afraid a new programming of the fpga is needed to make it a proper scope. I have worked with fpga's before, but that is a long time ago and used schematic entry (Orcad) to design the hardware and DOS based XACT to convert it to a bitstream for a XC3042 :)

So more learning to do before that can happen. :(

Also still a lot to learn about the F1C100s, to make it all work better.

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #947 on: August 08, 2021, 11:18:15 am »
Found another whopper :-DD

The scope handles data different for time base setting 100mS/div and up, and here they forgot to adjust the signal magnitude when the sensitivity is changed from 50mV/div to 100mV/div (probe 1x), because yes for the shorter time base settings the data is multiplied by two in software.

So when you are looking at some dc signal on say 100mS/div the trace stays on the same height when the sensitivity is on the lowest or one but lowest setting :palm:

Offline frenky

  • Supporter
  • ****
  • Posts: 1003
  • Country: si
    • Frenki.net
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #948 on: August 09, 2021, 11:14:29 am »
When I was on approximately 20th page of this thread I got so excited that I ordered one on ebay.
Now that I got to the page 38 I'm not sure anymore that this was wise decision. :P
I just hope that you (and community) will be able to fix many hidden shortcomings of this scope. Today I have received it and It's an older model with protruding BNCs but it seems really nice. ;)
 

Online pcprogrammer

  • Super Contributor
  • ***
  • Posts: 3710
  • Country: nl
Re: FNIRSI-1013D "100MHz" tablet oscilloscope
« Reply #949 on: August 09, 2021, 12:08:03 pm »
When I was on approximately 20th page of this thread I got so excited that I ordered one on ebay.
Now that I got to the page 38 I'm not sure anymore that this was wise decision. :P
I just hope that you (and community) will be able to fix many hidden shortcomings of this scope. Today I have received it and It's an older model with protruding BNCs but it seems really nice. ;)

For simple measurements it is fine, but indeed it has many flaws |O

Fixing it will take its time. At the moment I'm plowing through the part that does the up sampling. Yes indeed another short coming of the design. For the 250nS/div down to 50nS/div (haven't reached the 25nS/div and 10nS/div code yet) they do up sampling.

The FPGA time base stay's the same from 500nS/div down, and for 250nS/div they insert an average sample in between every sample, so doubling the number of samples.
For the 100nS/div they insert 4 samples with a 1/5th delta step. For the 50nS/div it is 9 samples with a 1/10th delta step.

It is a hard job to decipher the Chinese logic with a Ghidra sauce :-//

Code: [Select]
//------------------------------------------------------------------------------------------------------------------------------------------------

void scope_pre_process_50ns_data(uint16 *buffer,uint offset,uint count)
{
  uint uVar1;
  ushort uVar2;
  longlong lVar3;
  longlong lVar4;
  int iVar5;
  ushort *puVar6;
  uint16 *puVar7;
  undefined2 *puVar8;
  uint uVar9;
  uint16 *puVar10;
  ushort *puVar11;
  undefined2 *puVar12;
  int iVar13;
  int iVar14;
  uint uVar15;
  ushort uVar16;
  uint uVar17;
  short sVar18;
  short *psVar19;
 
  uVar9 = count * DAT_8001360c;  //0x0000CCCD
  uVar15 = uVar9 >> 0x13;        // count / 10

  if (uVar15 != 0)
  {
    puVar7 = (uint16 *)((int)buffer + offset * 2);
    puVar8 = DAT_80013610;                                //0x801AEF16  ten samples before
    puVar10 = (uint16 *)((int)puVar7 + -2);

    if ((uVar15 & 1) != 0)
    {
      puVar8 = DAT_80013610 + 10;
      *puVar8 = *(undefined2 *)puVar7;
      puVar10 = puVar7;
    }

    uVar17 = (uVar15 << 0xf) >> 0x10;

    while (uVar17 != 0)
    {
      puVar8[10] = *(undefined2 *)((int)puVar10 + 2);
      puVar10 = (uint16 *)((int)puVar10 + 4);
      uVar17 = uVar17 - 1 & 0xffff;
      puVar8 = puVar8 + 0x14;
      *puVar8 = *(undefined2 *)puVar10;
    }
  }

  iVar5 = DAT_80013614;   //0x66666667
  uVar17 = 0;

  if (0 < (int)(uVar15 - 1))
  {
    do
    {
      puVar11 = (ushort *)(DAT_80013618 + uVar17 * 0x14);  //801AEF2A  start of buffer
      uVar2 = *puVar11;        //sample 1
      uVar16 = puVar11[10];    //sample 2

      if (uVar2 < uVar16)
      {
        uVar1 = (uint)(ushort)(uVar16 - uVar2);   //Positive delta
        iVar13 = 1;
        sVar18 = 4;
        puVar11 = puVar11 + 1;  //sample 2

        //one tenth step????
        *puVar11 = ((short)(int)((longlong)iVar5 * (longlong)(int)uVar1 >> 0x22) - (short)((longlong)iVar5 * (longlong)(int)uVar1 >> 0x3f)) + uVar2;

        do
        {
          lVar3 = (longlong)iVar5 * (longlong)(int)(uVar1 * (iVar13 + 2));  //3 tenths
          lVar4 = (longlong)iVar5 * (longlong)(int)(uVar1 * (iVar13 + 1));  //2 tenths
          sVar18 = sVar18 + -1;
          puVar11[1] = ((short)(int)(lVar4 >> 0x22) - (short)(lVar4 >> 0x3f)) + uVar2;
          iVar13 = iVar13 + 2;
          puVar11 = puVar11 + 2;
          *puVar11 = ((short)(int)(lVar3 >> 0x22) - (short)(lVar3 >> 0x3f)) + uVar2;
        } while (sVar18 != 0);
      }
      else
      {
        uVar1 = (uint)(ushort)(uVar2 - uVar16);
        iVar13 = 1;
        sVar18 = 4;
        puVar11 = puVar11 + 1;
        *puVar11 = uVar2 - ((short)(int)((longlong)iVar5 * (longlong)(int)uVar1 >> 0x22) - (short)((longlong)iVar5 * (longlong)(int)uVar1 >> 0x3f));

        do
        {
          lVar3 = (longlong)iVar5 * (longlong)(int)(uVar1 * (iVar13 + 2));
          lVar4 = (longlong)iVar5 * (longlong)(int)(uVar1 * (iVar13 + 1));
          sVar18 = sVar18 + -1;
          puVar11[1] = uVar2 - ((short)(int)(lVar4 >> 0x22) - (short)(lVar4 >> 0x3f));
          iVar13 = iVar13 + 2;
          puVar11 = puVar11 + 2;
          *puVar11 = uVar2 - ((short)(int)(lVar3 >> 0x22) - (short)(lVar3 >> 0x3f));
        } while (sVar18 != 0);
      }

      uVar17 = uVar17 + 1 & 0xfffeffff;
    } while ((int)uVar17 < (int)(uVar15 - 1));
  }

  uVar17 = uVar15 - 1;

  if (0 < (int)uVar17)
  {
    puVar8 = DAT_80013620;     //0x801B8B60
    puVar12 = DAT_8001361c;    //0x801AEF20

    if ((uVar17 & 1) != 0)
    {
      puVar12 = DAT_8001361c + 10;
      puVar8 = DAT_80013620 + 10;
      *puVar8 = *puVar12;
    }

    uVar17 = uVar17 * 0x8000 >> 0x10;  // /2

    while (uVar17 != 0)
    {
      puVar8[10] = puVar12[10];
      puVar12 = puVar12 + 0x14;
      uVar17 = uVar17 - 1 & 0xffff;
      puVar8 = puVar8 + 0x14;
      *puVar8 = *puVar12;
    }
  }

  uVar17 = 0;

  if (0 < (int)(uVar15 - 2))
  {
    do
    {
      iVar13 = DAT_80013624 + uVar17 * 0x14;
      uVar2 = *(ushort *)(iVar13 + 10);
      uVar16 = *(ushort *)(iVar13 + 0x1e);

      if (uVar2 < uVar16)
      {
        uVar1 = (uint)(ushort)(uVar16 - uVar2);
        iVar14 = 1;
        sVar18 = 4;
        psVar19 = (short *)(iVar13 + 0xc);
        *psVar19 = ((short)(int)((longlong)iVar5 * (longlong)(int)uVar1 >> 0x22) - (short)((longlong)iVar5 * (longlong)(int)uVar1 >> 0x3f)) + uVar2;

        do
        {
          lVar3 = (longlong)iVar5 * (longlong)(int)(uVar1 * (iVar14 + 1));
          lVar4 = (longlong)iVar5 * (longlong)(int)(uVar1 * (iVar14 + 2));
          sVar18 = sVar18 + -1;
          psVar19[1] = ((short)(int)(lVar3 >> 0x22) - (short)(lVar3 >> 0x3f)) + uVar2;
          iVar14 = iVar14 + 2;
          psVar19 = psVar19 + 2;
          *psVar19 = ((short)(int)(lVar4 >> 0x22) - (short)(lVar4 >> 0x3f)) + uVar2;
        } while (sVar18 != 0);
      }
      else
      {
        uVar1 = (uint)(ushort)(uVar2 - uVar16);
        iVar14 = 1;
        sVar18 = 4;
        psVar19 = (short *)(iVar13 + 0xc);
        *psVar19 = uVar2 - ((short)(int)((longlong)iVar5 * (longlong)(int)uVar1 >> 0x22) - (short)((longlong)iVar5 * (longlong)(int)uVar1 >> 0x3f));

        do
        {
          lVar3 = (longlong)iVar5 * (longlong)(int)(uVar1 * (iVar14 + 1));
          lVar4 = (longlong)iVar5 * (longlong)(int)(uVar1 * (iVar14 + 2));
          sVar18 = sVar18 + -1;
          psVar19[1] = uVar2 - ((short)(int)(lVar3 >> 0x22) - (short)(lVar3 >> 0x3f));
          iVar14 = iVar14 + 2;
          psVar19 = psVar19 + 2;
          *psVar19 = uVar2 - ((short)(int)(lVar4 >> 0x22) - (short)(lVar4 >> 0x3f));
        } while (sVar18 != 0);
      }

      uVar17 = uVar17 + 1 & 0xfffeffff;
    } while ((int)uVar17 < (int)(uVar15 - 2));
  }

  uVar15 = count >> 1;

  if (count != 0)
  {
    uVar17 = uVar15;
    puVar10 = (uint16 *)((int)buffer + -2);
    puVar11 = DAT_80013628;
    puVar6 = DAT_8001362c;

    if ((count & 1) != 0)
    {
      puVar11 = DAT_80013628 + 1;
      puVar6 = DAT_8001362c + 1;
      *(short *)buffer = (short)((uint)*puVar11 + (uint)*puVar6 >> 1);
      puVar10 = buffer;
    }

    while (uVar17 != 0)
    {
      *(short *)((int)puVar10 + 2) = (short)((uint)puVar11[1] + (uint)puVar6[1] >> 1);
      *(short *)(uint16 *)((int)puVar10 + 4) = (short)((uint)puVar11[2] + (uint)puVar6[2] >> 1);
      uVar17 = uVar17 - 1;
      puVar10 = (uint16 *)((int)puVar10 + 4);
      puVar11 = puVar11 + 2;
      puVar6 = puVar6 + 2;
    }
  }

  uVar9 = uVar9 >> 0x12;
  uVar17 = uVar9 - 1;

  if (0 < (int)uVar17)
  {
    puVar12 = (undefined2 *)((int)buffer + -6);
    puVar8 = DAT_80013630;

    if ((uVar17 & 1) != 0)
    {
      puVar12 = (undefined2 *)((int)buffer + 4);
      puVar8 = DAT_80013630 + 5;
      *puVar8 = *puVar12;
    }

    uVar17 = uVar17 * 0x8000 >> 0x10;

    while (uVar17 != 0)
    {
      puVar8[5] = puVar12[5];
      puVar12 = puVar12 + 10;
      uVar17 = uVar17 - 1 & 0xffff;
      puVar8 = puVar8 + 10;
      *puVar8 = *puVar12;
    }
  }

  uVar17 = DAT_80013638;
  uVar9 = uVar9 - 2;

  if (0 < (int)uVar9)
  {
    uVar9 = uVar9 & 0xffff;
    puVar11 = DAT_80013634;

    do
    {
      uVar2 = *puVar11;
      uVar16 = puVar11[5];

      if (uVar2 < uVar16)
      {
        uVar16 = uVar16 - uVar2;
        puVar11[1] = uVar2 + (ushort)((uint)uVar16 * DAT_8001360c >> 0x12);
        puVar11[2] = (short)(uint)((ulonglong)uVar16 * 2 * (ulonglong)uVar17 >> 0x22) + uVar2;
        sVar18 = (short)(uint)((ulonglong)uVar16 * 4 * (ulonglong)uVar17 >> 0x22);
        puVar11[3] = (short)(uint)((ulonglong)((uint)uVar16 * 3) * (ulonglong)uVar17 >> 0x22) + uVar2;
      }
      else
      {
        uVar16 = uVar2 - uVar16;
        puVar11[1] = uVar2 - (ushort)((uint)uVar16 * DAT_8001360c >> 0x12);
        puVar11[2] = uVar2 - (short)(uint)((ulonglong)uVar16 * 2 * (ulonglong)uVar17 >> 0x22);
        sVar18 = -(short)(uint)((ulonglong)uVar16 * 4 * (ulonglong)uVar17 >> 0x22);
        puVar11[3] = uVar2 - (short)(uint)((ulonglong)((uint)uVar16 * 3) * (ulonglong)uVar17 >> 0x22
                                          );
      }

      puVar11[4] = uVar2 + sVar18;
      uVar9 = uVar9 - 1 & 0xffff;
      puVar11 = puVar11 + 5;
    } while (uVar9 != 0);
  }

  if (count != 0)
  {
    puVar10 = (uint16 *)((int)buffer + -2);
    puVar11 = DAT_8001362c;

    if ((count & 1) != 0)
    {
      puVar11 = DAT_8001362c + 1;
      *(short *)buffer = (short)((uint)*(ushort *)buffer + (uint)*puVar11 >> 1);
      puVar10 = buffer;
    }

    if (uVar15 != 0)
    {
      do
      {
        puVar6 = (ushort *)((int)puVar10 + 2);
        uVar15 = uVar15 - 1;
        puVar10 = (uint16 *)((int)puVar10 + 4);
        *puVar6 = (ushort)((uint)*puVar6 + (uint)puVar11[1] >> 1);
        puVar11 = puVar11 + 2;
        *(ushort *)puVar10 = (ushort)((uint)*puVar11 + (uint)*(ushort *)puVar10 >> 1);
      } while (uVar15 != 0);

      return;
    }

    return;
  }

  return;
}

//------------------------------------------------------------------------------------------------------------------------------------------------


This is the code for the 50nS/div up sampling I'm working on at the moment. More is done in this code then for the 250nS/div or 100nS/div, so a bit of mystery to solve.

The other two functions can still be improved upon by handling the data from the end instead of the beginning. They use three separate actions, one for making an interleaved copy of the samples, a second one to fill in the averages or steps and a third one to copy it back to the original buffer. Can all be done in one loop, but that is improvement for later.


Share me

Digg  Facebook  SlashDot  Delicious  Technorati  Twitter  Google  Yahoo
Smf