Description
Bug report
Bug description:
This is about 3.14.0bl vs 3.13.1, free threaded in both cases.
Microsoft Windows [Version 10.0.19045.4529]
I run and maintain an IOCP server in python 3.13.1t.
There are no 3rd party libraries being used.
The Problem: Using the exact same code, running 3.14t vs 3.13t cuts the throughput in half.
I've made a badly written, working benchmark, extracted/simplified from my IOCP server.
Server:
from ctypes import windll,create_string_buffer,c_void_p,c_ulong,c_ulonglong,Structure,byref,cast,addressof,POINTER,c_char
from ctypes.wintypes import DWORD,HANDLE
kernel32 = windll.kernel32
CreateNamedPipeW = kernel32.CreateNamedPipeW
CreateIOCompletionPort = kernel32.CreateIoCompletionPort
ConnectNamedPipe = kernel32.ConnectNamedPipe
GetQueuedCompletionStatusEx = kernel32.GetQueuedCompletionStatusEx;
ReadFile = kernel32.ReadFile
GLE = kernel32.GetLastError
class OVERLAPPED(Structure):
_fields_ = (("0", c_void_p),("1", c_void_p),("2", DWORD),("3", DWORD),("4", c_void_p),
("5", c_void_p),("6",c_void_p),("7",c_void_p),("8",c_void_p))
Overlapped = (OVERLAPPED*10)()
__Overlapped = byref(Overlapped)
IOCP = CreateIOCompletionPort(HANDLE(-1),None,0,4)
flag1 = 1 | 1073741824; flag2 = 4 | 2 | 0 | 8
Pipe = CreateNamedPipeW("\\\\.\\pipe\\IOCPBenchMark",flag1,flag2,255,32,0,0, None)
if not CreateIOCompletionPort(Pipe,IOCP,1,0): print("ERROR!")
ReadBuffer = create_string_buffer(1024)
__ReadBuffer = byref(ReadBuffer)
OverlapEntries = create_string_buffer(32*128)
ove = byref(OverlapEntries);
Completed = c_ulong(0)
__Completed = byref(Completed)
def __IOCPThread():
while True:
while not GetQueuedCompletionStatusEx(IOCP, ove, 255, __Completed, 0, False): continue
ReadFile(Pipe, __ReadBuffer,32,None,__Overlapped)
from threading import Thread
Threads = []
for t in range(4): Threads.append(Thread(target=__IOCPThread))
success = ConnectNamedPipe(Pipe, __Overlapped)
if not success:
if GLE() != 997:
print("ERROR 2")
while not GetQueuedCompletionStatusEx(IOCP, ove, 255, __Completed, 1, False): continue
print("Connected.")
ReadFile(Pipe, __ReadBuffer,32,None,__Overlapped)
for t in Threads: t.start()
from time import sleep
while True:
sleep(1)
Client:
from ctypes import windll,c_char_p,byref
from ctypes.wintypes import DWORD
from time import perf_counter as pfc
kernel32 = windll.kernel32
CreateFileW = kernel32.CreateFileW
WriteFile = kernel32.WriteFile
GLE = kernel32.GetLastError
written = DWORD()
__written = byref(written)
print(GLE())
GENERIC_WRITE = 1073741824
Pipe = kernel32.CreateFileW("\\\\.\\pipe\\IOCPBenchMark",GENERIC_WRITE,0,None,3,0,None)
if GLE() == 0: print("Connected.")
test = b"test"
t = pfc()+1
while True:
for Count in range(1000000):
if not WriteFile(Pipe, test, 4,__written, None):
print("ERROR ",GLE())
if not WriteFile(Pipe, test, 4,__written, None):
print("ERROR ",GLE())
if not WriteFile(Pipe, test, 4,__written, None):
print("ERROR ",GLE())
if not WriteFile(Pipe, test, 4,__written, None):
print("ERROR ",GLE())
if pfc() >= t:
t = pfc()+1
print(Count*4)
break
The server uses 4 threads. if you don't see any output, try reducing the amount.
When I use 8 threads (on my 8 core machine), I don't get any output. d'uh.
No, SMT-Threads don't count for anything here.
Each script runs in its own cmd.exe window.
Please be aware that you'll have to kill the server-process manually.
I wanted to add a call to "taskkill/F /IM:pytho*",
but then realized I might cause someone big trouble with that.
>python3.13t server.py
:
Client output:
205536
207128
206764
206504
204768
>python3.14t server.py
:
Client output:
107468
105516
106032
107492
108472
Perplexity suggested I should post this here,
because this is a use-case you people might be interested in.
Thank you.
CPython versions tested on:
3.14
Operating systems tested on:
Windows