From 635a3ca2a10b39cce7f067dfc16f909d9387612a Mon Sep 17 00:00:00 2001 From: jljusten Date: Tue, 14 May 2013 15:57:55 +0000 Subject: [PATCH] OvmfPkg: VirtioLib: populate the Available Ring correctly The descriptor table (also known as "queue") consists of descriptors. (The corresponding type in the code is VRING_DESC.) An individual descriptor describes a contiguous buffer, to be transferred uni-directionally between host and guest. Several descriptors in the descriptor table can be linked into a descriptor chain, specifying a bi-directional scatter-gather transfer between host and guest. Such a descriptor chain is also known as "virtio request". (The descriptor table can host sereval descriptor chains (in-flight virtio requests) in parallel, but the OVMF driver supports at most one chain, at any point in time.) The first descriptor in any descriptor chain is called "head descriptor". In order to submit a number of parallel requests (= a set of independent descriptor chains) from the guest to the host, the guest must put *only* the head descriptor of each separate chain onto the Available Ring. VirtioLib currently places the head of its one descriptor chain onto the Available Ring repeatedly, once for each single (head *or* dependent) descriptor in said descriptor chain. If the descriptor chain comprises N descriptors, this error amounts to submitting the same entire chain N times in parallel. Available Ring Descriptor table Ptr to head ----> Desc#0 (head of chain) Ptr to head --/ Desc#1 (next in same chain) ... / ... Ptr to head / Desc#(N-1) (last in same chain) Anatomy of a single virtio-blk READ request (a descriptor chain with three descriptors): virtio-blk request header, prepared by guest: VirtioAppendDesc PhysAddr=3FBC6050 Size=16 Flags=1 Head=1232 Next=1232 payload to be filled in by host: VirtioAppendDesc PhysAddr=3B934C00 Size=32768 Flags=3 Head=1232 Next=1233 host status, to be filled in by host: VirtioAppendDesc PhysAddr=3FBC604F Size=1 Flags=2 Head=1232 Next=1234 Processing on the host side -- the descriptor chain is processed three times in parallel (its head is available to virtqueue_pop() thrice); the same chain is submitted/collected separately to/from AIO three times: virtio_queue_notify vdev VDEV vq VQ#0 virtqueue_pop vq VQ#0 elem EL#0 in_num 2 out_num 1 bdrv_aio_readv bs BDRV sector_num 585792 nb_sectors 64 opaque REQ#0 virtqueue_pop vq VQ#0 elem EL#1 in_num 2 out_num 1 bdrv_aio_readv bs BDRV sector_num 585792 nb_sectors 64 opaque REQ#1 virtqueue_pop vq VQ#0 elem EL#2 in_num 2 out_num 1 bdrv_aio_readv bs BDRV sector_num 585792 nb_sectors 64 opaque REQ#2 virtio_blk_rw_complete req REQ#0 ret 0 virtio_blk_req_complete req REQ#0 status 0 virtio_blk_rw_complete req REQ#1 ret 0 virtio_blk_req_complete req REQ#1 status 0 virtio_blk_rw_complete req REQ#2 ret 0 virtio_blk_req_complete req REQ#2 status 0 On my Thinkpad T510 laptop with RHEL-6 as host, this probably leads to simultaneous DMA transfers targeting the same RAM area. Even though the source of each transfer is identical, the data is corrupted in the destination buffer -- the CRC32 calculated over the buffer varies, even though the origin of the transfers is the same, never rewritten LBA. SynchronousRequest Lba=585792 BufSiz=32768 ReqIsWrite=0 Crc32=BF68A44D The problem is invisible on my HP Z400 workstation. Fix the request submission by: - building the only one descriptor chain supported by VirtioLib always at the beginning of the descriptor table, - ensuring the head descriptor of this chain is put on the Available Ring only once, - requesting the virtio spec's language to be cleaned up . Available Ring Descriptor table Ptr to head ----> Desc#0 (head of chain) Desc#1 (next in same chain) ... Desc#(N-1) (last in same chain) VirtioAppendDesc PhysAddr=3FBC6040 Size=16 Flags=1 Head=0 Next=0 VirtioAppendDesc PhysAddr=3B934C00 Size=32768 Flags=3 Head=0 Next=1 VirtioAppendDesc PhysAddr=3FBC603F Size=1 Flags=2 Head=0 Next=2 virtio_queue_notify vdev VDEV vq VQ#0 virtqueue_pop vq VQ#0 elem EL#0 in_num 2 out_num 1 bdrv_aio_readv bs BDRV sector_num 585792 nb_sectors 64 opaque REQ#0 virtio_blk_rw_complete req REQ#0 ret 0 virtio_blk_req_complete req REQ#0 status 0 SynchronousRequest Lba=585792 BufSiz=32768 ReqIsWrite=0 Crc32=1EEB2B07 (The Crc32 was double-checked with edk2's and Linux's guest IDE driver.) Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Laszlo Ersek Reviewed-by: Jordan Justen git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@14356 6f19259b-4bc3-4df7-8a09-765794883524 --- OvmfPkg/Include/Library/VirtioLib.h | 27 +++++++------- OvmfPkg/Library/VirtioLib/VirtioLib.c | 52 ++++++++++++++++----------- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/OvmfPkg/Include/Library/VirtioLib.h b/OvmfPkg/Include/Library/VirtioLib.h index 448dd2b578..3bebd9f2f4 100644 --- a/OvmfPkg/Include/Library/VirtioLib.h +++ b/OvmfPkg/Include/Library/VirtioLib.h @@ -140,8 +140,8 @@ VirtioRingUninit ( // request. // typedef struct { - UINT16 HeadIdx; - UINT16 NextAvailIdx; + UINT16 HeadDescIdx; + UINT16 NextDescIdx; } DESC_INDICES; @@ -169,9 +169,8 @@ VirtioPrepare ( Append a contiguous buffer for transmission / reception via the virtio ring. - This function implements the following sections from virtio-0.9.5: + This function implements the following section from virtio-0.9.5: - 2.4.1.1 Placing Buffers into the Descriptor Table - - 2.4.1.2 Updating the Available Ring Free space is taken as granted, since the individual drivers support only synchronous requests and host side status is processed in lock-step with @@ -199,13 +198,9 @@ VirtioPrepare ( In *Indices: - @param [in] HeadIdx The index identifying the head buffer (first - buffer appended) belonging to this same - request. - - @param [in out] NextAvailIdx On input, the index identifying the next - descriptor available to carry the buffer. On - output, incremented by one, modulo 2^16. + @param [in out] NextDescIdx On input, the index identifying the next + descriptor to carry the buffer. On output, + incremented by one, modulo 2^16. **/ VOID @@ -221,8 +216,8 @@ VirtioAppendDesc ( /** - Notify the host about appended descriptors and wait until it processes the - last one (ie. all of them). + Notify the host about the descriptor chain just built, and wait until the + host processes it. @param[in] PciIo The target virtio PCI device to notify. @@ -230,8 +225,10 @@ VirtioAppendDesc ( @param[in out] Ring The virtio ring with descriptors to submit. - @param[in] Indices The function waits until the host processes - descriptors up to Indices->NextAvailIdx. + In *Indices: + + @param[in] HeadDescIdx Identifies the head descriptor of the descriptor + chain. @return Error code from VirtioWrite() if it fails. diff --git a/OvmfPkg/Library/VirtioLib/VirtioLib.c b/OvmfPkg/Library/VirtioLib/VirtioLib.c index 8398c9db4c..db619eee15 100644 --- a/OvmfPkg/Library/VirtioLib/VirtioLib.c +++ b/OvmfPkg/Library/VirtioLib/VirtioLib.c @@ -306,8 +306,11 @@ VirtioPrepare ( // // Prepare for virtio-0.9.5, 2.4.1 Supplying Buffers to the Device. // - Indices->HeadIdx = *Ring->Avail.Idx; - Indices->NextAvailIdx = Indices->HeadIdx; + // Since we support only one in-flight descriptor chain, we can always build + // that chain starting at entry #0 of the descriptor table. + // + Indices->HeadDescIdx = 0; + Indices->NextDescIdx = Indices->HeadDescIdx; } @@ -315,9 +318,8 @@ VirtioPrepare ( Append a contiguous buffer for transmission / reception via the virtio ring. - This function implements the following sections from virtio-0.9.5: + This function implements the following section from virtio-0.9.5: - 2.4.1.1 Placing Buffers into the Descriptor Table - - 2.4.1.2 Updating the Available Ring Free space is taken as granted, since the individual drivers support only synchronous requests and host side status is processed in lock-step with @@ -345,13 +347,9 @@ VirtioPrepare ( In *Indices: - @param [in] HeadIdx The index identifying the head buffer (first - buffer appended) belonging to this same - request. - - @param [in out] NextAvailIdx On input, the index identifying the next - descriptor available to carry the buffer. On - output, incremented by one, modulo 2^16. + @param [in out] NextDescIdx On input, the index identifying the next + descriptor to carry the buffer. On output, + incremented by one, modulo 2^16. **/ VOID @@ -366,20 +364,18 @@ VirtioAppendDesc ( { volatile VRING_DESC *Desc; - Desc = &Ring->Desc[Indices->NextAvailIdx % Ring->QueueSize]; + Desc = &Ring->Desc[Indices->NextDescIdx++ % Ring->QueueSize]; Desc->Addr = BufferPhysAddr; Desc->Len = BufferSize; Desc->Flags = Flags; - Ring->Avail.Ring[Indices->NextAvailIdx++ % Ring->QueueSize] = - Indices->HeadIdx % Ring->QueueSize; - Desc->Next = Indices->NextAvailIdx % Ring->QueueSize; + Desc->Next = Indices->NextDescIdx % Ring->QueueSize; } /** - Notify the host about appended descriptors and wait until it processes the - last one (ie. all of them). + Notify the host about the descriptor chain just built, and wait until the + host processes it. @param[in] PciIo The target virtio PCI device to notify. @@ -387,8 +383,10 @@ VirtioAppendDesc ( @param[in out] Ring The virtio ring with descriptors to submit. - @param[in] Indices The function waits until the host processes - descriptors up to Indices->NextAvailIdx. + In *Indices: + + @param[in] HeadDescIdx Identifies the head descriptor of the descriptor + chain. @return Error code from VirtioWrite() if it fails. @@ -405,14 +403,26 @@ VirtioFlush ( IN DESC_INDICES *Indices ) { + UINT16 NextAvailIdx; EFI_STATUS Status; UINTN PollPeriodUsecs; + // + // virtio-0.9.5, 2.4.1.2 Updating the Available Ring + // + // It is not exactly clear from the wording of the virtio-0.9.5 + // specification, but each entry in the Available Ring references only the + // head descriptor of any given descriptor chain. + // + NextAvailIdx = *Ring->Avail.Idx; + Ring->Avail.Ring[NextAvailIdx++ % Ring->QueueSize] = + Indices->HeadDescIdx % Ring->QueueSize; + // // virtio-0.9.5, 2.4.1.3 Updating the Index Field // MemoryFence(); - *Ring->Avail.Idx = Indices->NextAvailIdx; + *Ring->Avail.Idx = NextAvailIdx; // // virtio-0.9.5, 2.4.1.4 Notifying the Device -- gratuitous notifications are @@ -439,7 +449,7 @@ VirtioFlush ( // PollPeriodUsecs = 1; MemoryFence(); - while (*Ring->Used.Idx != Indices->NextAvailIdx) { + while (*Ring->Used.Idx != NextAvailIdx) { gBS->Stall (PollPeriodUsecs); // calls AcpiTimerLib::MicroSecondDelay if (PollPeriodUsecs < 1024) {