Had to dig into 16 years old project.
- This is defined by Windows memory subsystem and file system design. The lock and IO hierarchy can be only as follows
- Regular File IO and resources first.
- Paging File IO and resources second.
The Windows memory subsystem and file systems are designed for this hierarchy. That is why there are these callbacks masqueraded as FastIo, like AcquireFileForNtCreateSection and AcquireForCcFlush, as there was no other good place to add them. Reversing this hierarchy, recursively acquiring resources, or doing IO for two regular files will undoubtedly result in a deadlock. Even if it passed your tests, it will deadlock on some client's system with some 3rd party filters.
- The flag might not be defined for 3rd party drivers, but it can be defined and used.
//
// IO_OPEN_PAGING_FILE is not defined in the old WDKs
//
#ifndef IO_OPEN_PAGING_FILE
#define IO_OPEN_PAGING_FILE 0x0002
#endif//IO_OPEN_PAGING_FILE
//
// create a backup file as a paging one, this helps to tackle with many
// synchronization issues between ordinary files and the backup file
//
status = IoCreateFile( &shadowFile,
FILE_READ_DATA | FILE_WRITE_DATA | WRITE_DAC | SYNCHRONIZE,
&objAttr,
&ioStatusBlock,
NULL,
FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM,
FILE_SHARE_WRITE,
FILE_SUPERSEDE,
FILE_SYNCHRONOUS_IO_NONALERT | FILE_DELETE_ON_CLOSE | FILE_NON_DIRECTORY_FILE |
FILE_RANDOM_ACCESS | FILE_NO_COMPRESSION | FILE_NO_INTERMEDIATE_BUFFERING,
NULL,
0L,
CreateFileTypeNone,
NULL,
IO_OPEN_PAGING_FILE | IO_NO_PARAMETER_CHECKING );
- NTFS supports paging files.
- Mutex can be used. KeWaitForSingleObject can be called at IRQL <= APC_LEVEL . Just be careful about recursive acquisitions, this must be processed carefully regarding structures consistency.
- Memory manager and IO manager do not provide the same level of serialisation for IO in case of paging file IO, so this might result in data integrity issues if IO is not properly serialised.
FYI, some code I used to read files opened as paging files
//
// Prepare the MDL for reuse, for synchronous Paging IO
// requests the MDL doesn't freed( as in the case of the ordinary requests )
//
MmPrepareMdlForReuse( ReaderThread->LocalBuffer.PartialMdl );
IoBuildPartialMdl( ReaderThread->LocalBuffer.Mdl,
ReaderThread->LocalBuffer.PartialMdl,
(PCHAR)ReaderThread->LocalBuffer.Address,
BytesToRead );
//
// the buffer described by the MDL must be aligned to avoid any nasty surprises for file systems
//
DLD_ASSERT_BUGCHECK( 0x0 == MmGetMdlByteOffset( ReaderThread->LocalBuffer.PartialMdl ) );
//
// MDL must be for the system space!
//
DLD_ASSERT_BUGCHECK( (ULONG_PTR)MmGetMdlBaseVa( ReaderThread->LocalBuffer.PartialMdl ) > MAXIMUM_USER_SPACE_ADDRESS );
DldAcquireBuffersFileForRead( FileHandle );
{ // start of the lock
RC = DldIoSynchronousPageReadWrite( FileHandle->FileObject,
ReaderThread->LocalBuffer.PartialMdl,
&StartingOffset,
&IoStatusBlock,
FALSE );
} // end of the lock
DldReleaseBuffersFileFromRead( FileHandle );
//
// Prepare the MDL for reuse, for synchronous Paging IO
// requests the MDL doesn't freed( as in the case of the ordinary requests )
//
MmPrepareMdlForReuse( ReaderThread->LocalBuffer.PartialMdl );
The IO was implemented as
NTSTATUS
DldIoSynchronousPageReadWriteSpecifyDeviceObjectHint(
__in PFILE_OBJECT FileObject,
__in PMDL MemoryDescriptorList,
__in ULONG BufferSize, // if 0x0 the size is inferred from the MDL
__in PLARGE_INTEGER StartingOffset,
__out PIO_STATUS_BLOCK IoStatusBlock,
__in BOOLEAN IsWrite,
__in_opt PDEVICE_OBJECT FsdStackObject,
__in BOOLEAN DataStreamCanBeCached
)
/*++
Routine Description:
The caller must free the MemoryDescriptorList!
Arguments:
FileObject - A pointer to a referenced file object describing which file
the write should be performed on.
MemoryDescriptorList - An MDL which describes the physical pages that the
pages should be written to the disk. All of the pages have been locked
in memory. The MDL also describes the length of the write operation.
ATTENTION! This MDL will not be freed during the IRP completion the caller
is resposible for the MDL freeing.
StartingOffset - Pointer to the offset in the file from which the write
should take place.
IoStatusBlock - A pointer to the I/O status block in which the final status
and information should be stored.
FsdStackObject - if not NULL the request will be sent to the specified device else
the upper device on the stack will be used
DataStreamCanBeCached - TRUE if the caller understands that the data can be cached and
the data in the cache differs from the data on disk
Return Value:
The function value is the final status of the queue request to the I/O
system subcomponents.
--*/
{
NTSTATUS RC;
PIRP irp;
PIO_STACK_LOCATION irpSp;
PDEVICE_OBJECT deviceObject;
KEVENT Event;
DLD_ASSERT( !( FALSE == DataStreamCanBeCached && CcIsFileCached(FileObject) ) );
DLD_ASSERT( KeGetCurrentIrql() <= APC_LEVEL );
DLD_ASSERT( IoStatusBlock );
DLD_ASSERT( MemoryDescriptorList );
DLD_ASSERT( StartingOffset );
DLD_ASSERT( 0x0 != MmGetMdlByteCount( MemoryDescriptorList ) );
KeInitializeEvent( &Event, SynchronizationEvent, FALSE );
if( 0x0 == BufferSize )
BufferSize = MmGetMdlByteCount( MemoryDescriptorList );
DLD_ASSERT( BufferSize <= MmGetMdlByteCount( MemoryDescriptorList ) );
//
// Begin by getting a pointer to the device object that the file resides
// on.
//
if( NULL == FsdStackObject )
deviceObject = IoGetRelatedDeviceObject( FileObject );
else
deviceObject = FsdStackObject;
//
// Allocate an I/O Request Packet (IRP) for this out-page operation.
//
irp = IoAllocateIrp( deviceObject->StackSize, FALSE );
if (!irp) {
return STATUS_INSUFFICIENT_RESOURCES;
}
//
// Get a pointer to the first stack location in the packet. This location
// will be used to pass the function codes and parameters to the first
// driver.
//
irpSp = IoGetNextIrpStackLocation( irp );
//
// Fill in the IRP according to this request.
//
irp->MdlAddress = MemoryDescriptorList;
irp->Flags = IRP_PAGING_IO | IRP_NOCACHE | IRP_SYNCHRONOUS_PAGING_IO;
irp->RequestorMode = KernelMode;
irp->UserIosb = IoStatusBlock;
irp->UserEvent = &Event;
irp->UserBuffer = (PVOID) ((PCHAR) MemoryDescriptorList->StartVa + MemoryDescriptorList->ByteOffset);
irp->Tail.Overlay.OriginalFileObject = FileObject;
irp->Tail.Overlay.Thread = PsGetCurrentThread();
//
// Fill in the normal write parameters.
//
if( IsWrite ){
irpSp->MajorFunction = IRP_MJ_WRITE;
irpSp->Parameters.Write.Length = BufferSize;
irpSp->Parameters.Write.ByteOffset = *StartingOffset;
irpSp->FileObject = FileObject;
} else {
irpSp->MajorFunction = IRP_MJ_READ;
irpSp->Parameters.Read.Length = BufferSize;
irpSp->Parameters.Read.ByteOffset = *StartingOffset;
irpSp->FileObject = FileObject;
}
//
// set the completion routine which will zero
// the MDL pointer on completion
//
IoSetCompletionRoutine( irp,
DldPaginReadWriteCompletion,
NULL,
TRUE, TRUE, TRUE );
//
// Queue the packet to the appropriate driver based on whether or not there
// is a VPB associated with the device.
//
RC = IoCallDriver( deviceObject, irp );
//
// Wait for the IRP completion
//
if( STATUS_PENDING == RC ){
KeWaitForSingleObject( &Event,
Executive,
KernelMode,
FALSE,
NULL );
RC = IoStatusBlock->Status;
}
DLD_ASSERT( NT_SUCCESS( RC ) || STATUS_END_OF_FILE == RC );
return RC;
}
//---------------------------------------------------------------------
NTSTATUS
DldPaginReadWriteCompletion(
IN PDEVICE_OBJECT DeviceObject,
IN PIRP Irp,
IN PVOID Context
)
{
//
// We must not mark Irp as pending, because
// this routine is set by the request's initiator
// so while this routine is called
// the current stack will be invalid.
//
DLD_ASSERT( Irp->CurrentLocation == ( Irp->StackCount + 0x1 ) );
//
// But for safety check, may be I forgot and
// set this routine for the Irp which has
// been created not me.
//
if( Irp->PendingReturned && Irp->CurrentLocation <= Irp->StackCount ){
IoMarkIrpPending( Irp );
}
//
// all known kernels do not free Irp's MDL
// for the paging IO, but in the future this
// may change, so set pointer to NULL to
// avert the kernel from the MDL freeing
//
Irp->MdlAddress = NULL;
return STATUS_SUCCESS;
}