WdfRequestUnmarkCanc?elable results in BSOD (continued)

I can’t seem to locate the original thread I started last month. So I’ll start a new one here.

I have an IOCTL based DMA driver that I wrote for interfacing with PCIe FPGAs. It’s for Win7 and built using the KMDF APIs. It works well and is reliable until I add support for IO cancellation. Once I add the WdfRequestMarkCancelableEx, WdfRequestUnmarkCancelable calls I get the BSOD .

The Minidump log when I have driver verifier turned on (as well as KMDF VerifyOn set to 1) is below.

CUSTOMER_CRASH_COUNT: 1

DEFAULT_BUCKET_ID: VERIFIER_ENABLED_VISTA_MINIDUMP

BUGCHECK_STR: 0x1E

PROCESS_NAME: svchost.exe

CURRENT_IRQL: 2

TAG_NOT_DEFINED_c000000f: FFFFF880032FFFB0

EXCEPTION_RECORD: fffff880032ff838 – (.exr 0xfffff880032ff838)
ExceptionAddress: fffff80002c7ccd0 (nt!DbgBreakPoint)
ExceptionCode: 80000003 (Break instruction exception)
ExceptionFlags: 00000000
NumberParameters: 1
Parameter[0]: 0000000000000000

TRAP_FRAME: fffff880032ff8e0 – (.trap 0xfffff880032ff8e0)
NOTE: The trap frame does not contain all registers.
Some register values may be zeroed or incorrect.
rax=fffff880032ffa78 rbx=0000000000000000 rcx=fffff98002aaafd0
rdx=fffff98002aaafd0 rsi=0000000000000000 rdi=0000000000000000
rip=fffff80002c7ccd1 rsp=fffff880032ffa78 rbp=fffff880032ffb02
r8=0000000000000000 r9=fffff88000ede610 r10=7efefefefefeff40
r11=8101010101010100 r12=0000000000000000 r13=0000000000000000
r14=0000000000000000 r15=0000000000000000
iopl=0 nv up ei pl nz na pe nc
nt!DbgBreakPoint+0x1:
fffff800`02c7ccd1 c3 ret
Resetting default scope

LAST_CONTROL_TRANSFER: from fffff80002c7c8be to fffff80002c84e90

STACK_TEXT:
fffff880032fe918 fffff80002c7c8be : fffffa80199bc580 fffff88000c0bf6b fffff880032ff090 fffff80002cb0a00 : nt!KeBugCheck
fffff880032fe920 fffff80002cb06cd : fffff80002e95398 fffff80002dce22c fffff80002c10000 fffff880032ff838 : nt!KiKernelCalloutExceptionHandler+0xe
fffff880032fe950 fffff80002caf4a5 : fffff80002dd62c4 fffff880032fe9c8 fffff880032ff838 fffff80002c10000 : nt!RtlpExecuteHandlerForException+0xd
fffff880032fe980 fffff80002cc0431 : fffff880032ff838 fffff880032ff090 fffff88000000000 00000000c0000010 : nt!RtlDispatchException+0x415
fffff880032ff060 fffff80002c84542 : fffff880032ff838 0000000000000000 fffff880032ff8e0 fffff98002aaae40 : nt!KiDispatchException+0x135
fffff880032ff700 fffff80002c82374 : 0000000000000000 fffff880032ff960 00000000c0000010 fffff98002aaae40 : nt!KiExceptionDispatch+0xc2
fffff880032ff8e0 fffff80002c7ccd1 : fffff88000efe0bd fffff98002aaafd0 fffff98002aaafd0 0000000000000000 : nt!KiBreakpointTrap+0xf4
fffff880032ffa78 fffff88000efe0bd : fffff98002aaafd0 fffff98002aaafd0 0000000000000000 fffff88000ede610 : nt!DbgBreakPoint+0x1
fffff880032ffa80 fffff88000eff691 : 0000000000000000 fffff98002aaae40 fffff98002aaae40 fffff9800c722e60 : Wdf01000!FxRequest::Vf_VerifyRequestIsCancelable+0x79
fffff880032ffad0 fffff88000e5dbcc : 0000000000000002 fffff9800c722e60 fffff880032ffb50 fffff88000000008 : Wdf01000!FxIoQueue::Vf_VerifyRequestCancelable+0x99
fffff880032ffb10 fffff88000e68685 : ffff9d59b1850b10 fffff9800c722e60 0000067ff38dd198 0000000000000022 : Wdf01000!FxIoQueue::RequestCancelable+0x3c
fffff880032ffb80 fffff880061a821b : fffff9800c722e60 0000067ffd4db1b8 fffff98002b24e40 fffffa801bf8049c : Wdf01000!imp_WdfRequestUnmarkCancelable+0x115
fffff880032ffbe0 fffff880061a6a22 : 0000067ff38dd198 fffffa801bf8049c fffff880061a9c70 fffffa801bf8030c : riffa!WdfRequestUnmarkCancelable+0x1b [c:\winddk\7600.16385.1\inc\wdf\kmdf\1.9\wdfrequest.h @ 775]
fffff880032ffc10 fffff880061a7d6c : fffffa801bf802d0 fffff80000000000 fffff88000000000 fffffa801bf8030c : riffa!RiffaCompleteRequest+0xa2 [c:\winddk\7600.16385.1\src\general\riffa\driver\windows\sys\riffa.c @ 1462]
fffff880032ffc70 fffff880061a5252 : fffffa801bf802d0 fffff88000000000 fffff88000000001 fffffa8000000000 : riffa!RiffaTransactionComplete+0x10c [c:\winddk\7600.16385.1\src\general\riffa\driver\windows\sys\riffa.c @ 1907]
fffff880032ffcc0 fffff88000ed96d3 : 0000067ffd4db1b8 0000057fe407ffd8 000000004e54241d 000000004559db6a : riffa!RiffaEvtInterruptDpc+0x622 [c:\winddk\7600.16385.1\src\general\riffa\driver\windows\sys\riffa.c @ 878]
fffff880032ffed0 fffff80002c907ac : fffff880032d0180 fffff880032d0180 fffff98002b24ed8 0000000000000042 : Wdf01000!FxInterrupt::DpcHandler+0xc3
fffff880032fff00 fffff80002c87925 : 0000000000000000 fffffa8019cdb300 0000000000000000 fffff88000ed9840 : nt!KiRetireDpcList+0x1bc
fffff880032fffb0 fffff80002c8773c : 0000000000000000 0000000000000000 0000000000000000 0000000000000000 : nt!KxRetireDpcList+0x5
fffff880063657e0 0000000000000000 : 0000000000000000 0000000000000000 0000000000000000 0000000000000000 : nt!KiDispatchInterruptContinue

STACK_COMMAND: kb

FOLLOWUP_IP:
riffa!WdfRequestUnmarkCancelable+1b [c:\winddk\7600.16385.1\inc\wdf\kmdf\1.9\wdfrequest.h @ 775]
fffff880`061a821b 4883c428 add rsp,28h

FAULTING_SOURCE_LINE: c:\winddk\7600.16385.1\inc\wdf\kmdf\1.9\wdfrequest.h

FAULTING_SOURCE_FILE: c:\winddk\7600.16385.1\inc\wdf\kmdf\1.9\wdfrequest.h

FAULTING_SOURCE_LINE_NUMBER: 775

FAULTING_SOURCE_CODE:
771: WDFREQUEST Request
772: )
773: {
774: return ((PFN_WDFREQUESTUNMARKCANCELABLE) WdfFunctions[WdfRequestUnmarkCancelableTableIndex])(WdfDriverGlobals, Request);

775: }
776:
777: //
778: // WDF Function: WdfRequestIsCanceled
779: //
780: typedef

SYMBOL_STACK_INDEX: c

SYMBOL_NAME: riffa!WdfRequestUnmarkCancelable+1b

FOLLOWUP_NAME: MachineOwner

MODULE_NAME: riffa

IMAGE_NAME: riffa.sys

DEBUG_FLR_IMAGE_TIMESTAMP: 54e4c333

FAILURE_BUCKET_ID: X64_0x1E_VRF_riffa!WdfRequestUnmarkCancelable+1b

BUCKET_ID: X64_0x1E_VRF_riffa!WdfRequestUnmarkCancelable+1b

Followup: MachineOwner


It seems to suggest that the call to WdfRequestUnmarkCancelable in my RiffaTransactionComplete function is to blame. OK. But my understanding is that I need to call WdfRequestUnmarkCancelable on every WDFREQUEST that I call WdfRequestMarkCancelable. According to the WdfRequestUnmarkCancelable MSDN page (https://msdn.microsoft.com/en-us/library/windows/hardware/ff550035(v=vs.85).aspx), this must be done everywhere except the EvtRequestCancel callback.

The general flow of things in my driver:

-> RiffaEvtIoDeviceControl()
// This is the entry point
-> RiffaIoctlSend()
// Save the WDFREQUEST in a field on the PDEVICE_EXTENSION object.
// Start a DMA transfer.
// Call WdfRequestMarkCancelableEx() with the WDFREQUEST and own EvtRequestCancel callback called RiffaEvtRequestCancel.

The driver waits for an interrupt to signal the end of the DMA transfer.

-> RiffaEvtInterruptDpc()
// Interrupt handler
-> RiffaTransactionComplete()
// Ends the DMA transfer, releases the DMA transaction object.
-> RiffaCompleteRequest()
// Gets WDFREQUEST off the PDEVICE_EXTENSION object. Checks if it’s null. If null the request was cancelled and this does nothing. If not null, sets the field on the PDEVICE_EXTENSION to null and calls WdfRequestUnmarkCancelable(), followed by WdfRequestCompleteWithInformation().

If ever invoked, the EvtRequestCancel callback, RiffaEvtRequestCancel, sets the WDFREQUEST field on the PDEVICE_EXTENSION to null and calls WdfRequestCompleteWithInformation().

It’s a pretty straightforward/simple driver modeled after the PLX9x5x driver example. I have yet been able to determine what’s causing this problem. The only thing I do know is that it only seems to happen when issuing a lot of back to back invocations. Stepping through invocations with a debugger seems to slow things down enough to prevent whatever’s going on.

I’ll post the relevant function bodies in a follow up post.

I’ll add a few extra detail and suspicions. The suspicions about possible causes stem from other somewhat related posts I’ve found. But as of yet, I’ve been unable to identify these ideas as the culprit.

  1. The driver runs flawlessly without WdfRequestMarkCancelable & WdfRequestUnmarkCancelable. With low frequency and high frequency back to back invocations. But I need to be able to cancel the IO request. So I’m stuck using WdfRequestMarkCancelable & WdfRequestUnmarkCancelable.

  2. Someone had a similar problem and discovered that the WDFREQUEST objects were being reused behind the scenes and that person’s driver was failing to call WdfRequestUnmarkCancelable on one of the code paths. His error was similar but was being raised when calling WdfRequestMarkCancelable on a WDFREQUEST that was presumably still marked cancelable. I don’t think this is what’s going on here, but knowing that the WDFREQUESTS are being used is useful.

  3. In my initial post on this, someone vaguely suggested that the WDFREQUEST might be non-null on the PDEVICE_EXTENSION but that somehow the “underlying request” object" might have been completed. I don’t see how this could happen as both code paths that complete this WDFREQUEST mark it as null before doing so. This suggestion was a bit vague, so I don’t really know what would cause such a situation. If you do, please enlighten me.

Any ideas would be appreciated. Thanks.

Matt

/**
* Handles IRP_MJ_DEVICE_CONTROL requests for IOCTL_RIFFA_SEND.
* .
* DevExt - Handle to the device extension object.
*
* Request - Handle to a framework request object.
*
* OutputBufferLength - Length of the request’s output buffer,
* if an output buffer is available.
*
* InputBufferLength - Length of the request’s input buffer,
* if an input buffer is available.
*/
VOID RiffaIoctlSend(IN PDEVICE_EXTENSION DevExt, IN WDFREQUEST Request,
IN size_t OutputBufferLength, IN size_t InputBufferLength) {
NTSTATUS status = STATUS_SUCCESS;
PREQUEST_EXTENSION reqExt;
PRIFFA_FPGA_CHNL_IO io;
UINT64 length;
PCHAR buf = NULL;
size_t bufSize;

// Input should be non-zero
if(!InputBufferLength) {
WdfRequestCompleteWithInformation(Request, STATUS_INVALID_PARAMETER, 0);
return;
}

// Get the input buffer
status = WdfRequestRetrieveInputBuffer(Request, 0, &buf, &bufSize);
if (!NT_SUCCESS(status)) {
WdfRequestCompleteWithInformation(Request, status, 0);
return;
}
if (bufSize < sizeof(RIFFA_FPGA_CHNL_IO)) {
WdfRequestCompleteWithInformation(Request, STATUS_INVALID_PARAMETER, 0);
return;
}
io = (PRIFFA_FPGA_CHNL_IO)buf;

// Validate the length, last, chnl
length = (io->Length < (OutputBufferLength>>2) ? io->Length : (OutputBufferLength>>2));
if (io->Last > 1) {
WdfRequestCompleteWithInformation(Request, STATUS_INVALID_PARAMETER, 0);
return;
}
if (io->Chnl >= DevExt->NumChnls) {
WdfRequestCompleteWithInformation(Request, STATUS_INVALID_PARAMETER, 0);
return;
}

// Check that this isn’t an already running transaction
if (InterlockedExchange(&DevExt->Chnl[io->Chnl].InUse, 1) == 1) {
WdfRequestCompleteWithInformation(Request, STATUS_INVALID_PARAMETER, 0);
return;
}

// Set the channel number in the request context for RiffaEvtRequestCancel.
reqExt = RiffaGetRequestContext(Request);
reqExt->Chnl = io->Chnl;

// Start a send transaction.
if (length) {
// Start a DMA transaction for sending.
DevExt->Chnl[io->Chnl].Timeout = io->Timeout;
DevExt->Chnl[io->Chnl].Length = (length<<2);
DevExt->Chnl[io->Chnl].SpillAfter = (length<<2);
DevExt->Chnl[io->Chnl].Offset = (io->Offset<<2);
DevExt->Chnl[io->Chnl].Last = io->Last;
DevExt->Chnl[io->Chnl].Provided = 0;
DevExt->Chnl[io->Chnl].ProvidedPrev = 0;
DevExt->Chnl[io->Chnl].Confirmed = 0;
DevExt->Chnl[io->Chnl].ConfirmedPrev = 0;
DevExt->Chnl[io->Chnl].ActiveCount = 0;
DevExt->Chnl[io->Chnl].Cancel = 0;
DevExt->Chnl[io->Chnl].Request = Request;
InterlockedExchange(&DevExt->Chnl[io->Chnl].ReqdDone, 0);
status = RiffaStartDmaTransaction(DevExt, io->Chnl, (length<<2),
0, WdfDmaDirectionWriteToDevice);
if (!NT_SUCCESS(status)) {
WdfRequestCompleteWithInformation(Request, status, 0);
return;
}
else {
// Mark the WDFREQUEST as cancellable.
status = WdfRequestMarkCancelableEx(Request, RiffaEvtRequestCancel);
if (!NT_SUCCESS(status)) {
WdfRequestCompleteWithInformation(Request, status, 0);
}
}
}
else if (io->Last) {
// Program the device for zero length send (device RX) and complete
RiffaProgramSend(DevExt, io->Chnl, (UINT32)length, io->Offset, io->Last);
WdfRequestCompleteWithInformation(Request, STATUS_SUCCESS, 0);
}
else {
// Invalid request, results in no send
WdfRequestCompleteWithInformation(Request, STATUS_INVALID_PARAMETER, 0);
}
}

/**
* Called after the last DMA transfer for the DMA transaction is completed.
* Sets the transferred bytes and completes the IOCTL call.
*
* DevExt - Pointer to the Device Extension
*
* Chnl - Channel number on which the DMA transaction takes place
*
* Transferred - Words transferred according to the hardware
*
* Status - Status after the last transfer has completed.
*/
VOID RiffaTransactionComplete(IN PDEVICE_EXTENSION DevExt, IN UINT32 Chnl,
IN UINT32 Transferred, IN NTSTATUS Status) {

// Stop the timer (if set)
WdfTimerStop(DevExt->Chnl[Chnl].Timer, FALSE);

// Check that the request has not been cancelled.
if (!RiffaThreadEnter(DevExt, Chnl))
return;

// Release the DMA Transaction
WdfDmaTransactionRelease(DevExt->Chnl[Chnl].DmaTransaction);

if (Chnl < RIFFA_MAX_NUM_CHNLS) {
// Update the total confirmed data
DevExt->Chnl[Chnl].Confirmed = (((UINT64)Transferred)<<2);

// Complete the send request
InterlockedExchange(&DevExt->Chnl[Chnl].InUse, 0);
RiffaCompleteRequest(DevExt, Chnl, Status);
}
else {
// Complete or repeat
if (DevExt->Chnl[Chnl].Last || !NT_SUCCESS(Status)) {
// Update the total confirmed data
DevExt->Chnl[Chnl].Confirmed = (((UINT64)Transferred)<<2);

// Complete the receive request
InterlockedExchange(&DevExt->Chnl[Chnl].InUse, 0);
RiffaCompleteRequest(DevExt, Chnl, Status);
}
else {
// Not the “last” transaction. Save the transferred amount.
DevExt->Chnl[Chnl].ConfirmedPrev += (((UINT64)Transferred)<<2);
DevExt->Chnl[Chnl].Confirmed = 0;
DevExt->Chnl[Chnl].ProvidedPrev = 0;
DevExt->Chnl[Chnl].Provided = 0;

// Stay in the kernel and start another receive DMA transaction.
// If an interrupt with transaction info has already been received,
// start the transaction. If not, set the “ready” bit.
if (InterlockedExchange(&DevExt->Chnl[Chnl].Ready, 1)) {
// Clear the “ready” bit and start the transaction
InterlockedExchange(&DevExt->Chnl[Chnl].Ready, 0);
RiffaStartRecvTransaction(DevExt, Chnl);
}
}
}

// Check for a cancel request and service it.
RiffaThreadExit(DevExt, Chnl);
}

/**
* Called when the WDFREQUEST object for the specified channel should be
* completed with the specified status. The WDFREQUEST has been marked
* cancelable so it must first be unmarked cancelable. After completion, the
* pointer to the WDFREQUEST is NULL’d out to indicate that the WDFREQUEST is
* no longer valid.
*
* DevExt - Pointer to the Device Extension
*
* Chnl - Channel number on which the DMA is taking place
*
* Status - NTSTATUS to set for completion
*/
VOID RiffaCompleteRequest(IN PDEVICE_EXTENSION DevExt, IN UINT32 Chnl,
IN NTSTATUS Status) {
NTSTATUS status;
WDFREQUEST request;

if ((request = DevExt->Chnl[Chnl].Request) != NULL) {
DevExt->Chnl[Chnl].Request = NULL;
// Try to complete the request
status = WdfRequestUnmarkCancelable(request);
if (status != STATUS_CANCELLED) {
// Complete the request (nobody has done it yet)
DevExt->Chnl[Chnl].Request = NULL;
WdfRequestCompleteWithInformation(request, Status,
(ULONG_PTR)(DevExt->Chnl[Chnl].ConfirmedPrev +
DevExt->Chnl[Chnl].Confirmed)>>2);
}
}
}

/**
* EvtRequestCancel handler for WDFREQUEST objects for IOCTL sends/receives.
* Called if the IO Manager or calling application needs to cancel the send.
* In practice this should only happen when the user application hangs and the
* user CTRL+C signals or Task Ends the application.
*
* Request - WDFREQUEST object from the IOCTL queue, representing the send
*/
VOID RiffaEvtRequestCancel(IN WDFREQUEST Request) {
PDEVICE_EXTENSION devExt;
PREQUEST_EXTENSION reqExt;
UINT32 chnl;
BOOLEAN canCancel;

devExt = RiffaGetDeviceContext(WdfIoQueueGetDevice(WdfRequestGetIoQueue(Request)));
reqExt = RiffaGetRequestContext(Request);

// Acquire the channel lock
WdfSpinLockAcquire(devExt->Chnl[reqExt->Chnl].SpinLock);

// See if we can cancel right now.
canCancel = (devExt->Chnl[reqExt->Chnl].ActiveCount == 0);
if (canCancel) {
// NULL out the request so that no other threads use it
devExt->Chnl[reqExt->Chnl].Request = NULL;
}
else {
// Set the cancel flag so that the last active thread cancels for us.
devExt->Chnl[reqExt->Chnl].Cancel = 1;
}

// Release the channel lock
WdfSpinLockRelease(devExt->Chnl[reqExt->Chnl].SpinLock);

// Cancel the request
if (canCancel) {
InterlockedExchange(&devExt->Chnl[reqExt->Chnl].Ready, 0);
InterlockedExchange(&devExt->Chnl[reqExt->Chnl].InUse, 0);
devExt->Chnl[reqExt->Chnl].ActiveCount = 0;
devExt->Chnl[reqExt->Chnl].Cancel = 0;
WdfDmaTransactionRelease(devExt->Chnl[reqExt->Chnl].DmaTransaction);
WdfTimerStop(devExt->Chnl[reqExt->Chnl].Timer, FALSE);
WdfRequestCompleteWithInformation(Request, STATUS_CANCELLED,
(ULONG_PTR)(devExt->Chnl[reqExt->Chnl].ConfirmedPrev +
devExt->Chnl[reqExt->Chnl].Confirmed)>>2);
}
}