Hi Wilco, On 12/23/22 19:35, Wilco Dijkstra wrote: > Hi Alex, (a) > >>       if (dst == end) >>           return end; >>       if (stp_unlikely(dst == NULL))  // Allow chaining with stpeprintf(). >>           return NULL; > >> Oh, and the two branches above can be optimized into a branch that returns dst. > > How? There will be 2 branches since you're doing 2 checks here... That is equivalent to: (b) if (dst == end) return dst; if (stp_unlikely(dst == NULL)) // Allow chaining with stpeprintf(). return dst; which itself is equivalent to: (c) if ((dst == end) || stp_unlikely(dst == NULL)) return dst; which still has a branch in ||, due to the shortcut of the boolean operator. However, the compiler is allowed to transform it into bitwise, since there are no side effects, no UB, and the result would be the same: (d) if ((dst == end) | stp_unlikely(dst == NULL)) return dst; Which doesn't have a hidden branch. I tried GCC, and (b) and (c) produce the same assembly code, but slightly different than (a) (but almost identical, nothing significant). (d) produces considerably different assembly. Tried under -O3 -march=native. I don't know enough assembly to judge which is better; I'll copy the results here for the curious. I guess the compiler seems to prefer an extra branch here over unconditionally doing bitwise operations; it very likely knows more than I do. Cheers, Alex alx@asus5775:~/src/alx/libstp$ diff -u a.s b.s --- a.s 2022-12-23 23:27:57.788103834 +0100 +++ b.s 2022-12-23 23:28:13.463919271 +0100 @@ -59,9 +59,9 @@ .cfi_offset 3, -16 movq %rsi, %rbx cmpq %rsi, %rdi - je .L9 - testq %rdi, %rdi je .L12 + testq %rdi, %rdi + je .L13 movq %rbx, %rcx movq %rdx, %rsi xorl %edx, %edx @@ -79,7 +79,7 @@ .L11: .cfi_restore_state movb $0, -1(%rbx) -.L9: +.L12: movq %rbx, %rax popq %rbx .cfi_remember_state @@ -87,7 +87,7 @@ ret .p2align 4,,10 .p2align 3 -.L12: +.L13: .cfi_restore_state xorl %eax, %eax popq %rbx alx@asus5775:~/src/alx/libstp$ diff -u b.s c.s alx@asus5775:~/src/alx/libstp$ diff -u c.s d.s --- c.s 2022-12-23 23:29:07.315367548 +0100 +++ d.s 2022-12-23 23:28:58.007455133 +0100 @@ -11,16 +11,16 @@ .cfi_offset 3, -16 movq %rsi, %rbx cmpq %rsi, %rdi - je .L2 + je .L4 testq %rdi, %rdi - je .L5 + je .L4 movq %rbx, %rcx movq %rdx, %rsi xorl %edx, %edx subq %rdi, %rcx call memccpy@PLT testq %rax, %rax - je .L4 + je .L3 decq %rax popq %rbx .cfi_remember_state @@ -28,20 +28,19 @@ ret .p2align 4,,10 .p2align 3 -.L4: +.L3: .cfi_restore_state - movb $0, -1(%rbx) -.L2: movq %rbx, %rax + movb $0, -1(%rbx) popq %rbx .cfi_remember_state .cfi_def_cfa_offset 8 ret .p2align 4,,10 .p2align 3 -.L5: +.L4: .cfi_restore_state - xorl %eax, %eax + movq %rdi, %rax popq %rbx .cfi_def_cfa_offset 8 ret @@ -59,16 +58,16 @@ .cfi_offset 3, -16 movq %rsi, %rbx cmpq %rsi, %rdi - je .L12 + je .L10 testq %rdi, %rdi - je .L13 + je .L10 movq %rbx, %rcx movq %rdx, %rsi xorl %edx, %edx subq %rdi, %rcx call memccpy@PLT testq %rax, %rax - je .L11 + je .L9 decq %rax popq %rbx .cfi_remember_state @@ -76,20 +75,19 @@ ret .p2align 4,,10 .p2align 3 -.L11: +.L9: .cfi_restore_state - movb $0, -1(%rbx) -.L12: movq %rbx, %rax + movb $0, -1(%rbx) popq %rbx .cfi_remember_state .cfi_def_cfa_offset 8 ret .p2align 4,,10 .p2align 3 -.L13: +.L10: .cfi_restore_state - xorl %eax, %eax + movq %rdi, %rax popq %rbx .cfi_def_cfa_offset 8 ret > > Cheers, > Wilco --