Ok so, WP7 doesn't allow custom shader code at the moment so you might think that getting something like instancing working would be impossible. Luckily it's pretty easy to append BoneIndices and BoneWeights to an existing vertex buffer for use with a SkinnedEffect shader.

Shader-instancing uses the same shader code as skinning. By setting the number of bones to one and setting the bone weight to 1.0 (100%), you can use the bone as the transformation matrix for each of your instances.

The basic idea is as follows:

  • Get the original model's vertex buffer
  • Calculate the new vertex stride with an extra Byte4 for the BoneIndices and a Vector4 for the BoneWeights
  • Create a new vertex buffer large enough for the maximum number of instances per draw call, using the original model's vertex count and the new stride size
  • Populate this new vertex buffer by replicating the vertex data for each instance, setting the correct bone index for each instance and setting the bone weight to 1.0
  • Do the same for the IndexBuffer - create an index buffer large enough for all instances and replicate the data for each instance

To draw the instances, pass an array of transformation matrices as the bones of a SkinnedEffect shader while setting the number of bones to one.

You can see the code below. Most of the code has been taken from the instancing sample on creators.xna.com, with modifications for XNA4 and SkinnedEffect. I've also moved the code away from the Content Pipeline into a class that you use by simply passing an existing model and GraphicsDevice. This might not be perfect but it should be pretty easy to adapt it to your needs. Also, the code is extremely simple and naive because it expects your mesh to have a single meshpart with one texture only. This is fine for my simple models but if you need something more complex you'll have to tweak the code a bit.

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using Microsoft.Xna.Framework.Graphics;

using Microsoft.Xna.Framework;


namespace DataTypes


    public class InstancedModel


        const int maxShaderMatrices = 60;


        VertexBuffer instancedVertexBuffer;

        IndexBuffer instancedIndexBuffer;

        VertexDeclaration instancedVertexDeclaration;

        int originalVertexCount = 0;

        int originalIndexCount = 0;

        int vertexStride = 0;

        int maxInstances = 0;


        Matrix[] tempMatrices = new Matrix[maxShaderMatrices];

        Model originalModel;

        GraphicsDevice graphicsDevice;


        public InstancedModel(GraphicsDevice graphics, Model model)


            graphicsDevice = graphics;

            originalModel = model;





        void SetupInstancedVertexData()


            // Read the existing vertex data, then destroy the existing vertex buffer.

            ModelMesh mesh = originalModel.Meshes[0];

            ModelMeshPart part = mesh.MeshParts[0];


            originalVertexCount = part.VertexBuffer.VertexCount;

            VertexDeclaration originalVertexDeclaration = part.VertexBuffer.VertexDeclaration;


            int indexOverflowLimit = ushort.MaxValue / originalVertexCount;

            maxInstances = Math.Min(indexOverflowLimit, maxShaderMatrices);



            byte[] oldVertexData = new byte[originalVertexCount * originalVertexDeclaration.VertexStride];




            // Adjust the vertex stride to include our additional index channel.

            int oldVertexStride = part.VertexBuffer.VertexDeclaration.VertexStride;

            vertexStride = oldVertexStride + (sizeof(byte) * 4) + (sizeof(float) * 4); //add Byte4 for BoneIndices and Vector4 for BoneWeights


            // Allocate a temporary array to hold the replicated vertex data.

            byte[] newVertexData = new byte[originalVertexCount * vertexStride * maxInstances];


            int outputPosition = 0;


            // Replicate one copy of the original vertex buffer for each instance.

            for (int instanceIndex = 0; instanceIndex < maxInstances; instanceIndex++)


                int sourcePosition = 0;


                // Convert the instance index from float into an array of raw bits.

                byte[] blendIndices = new byte[4];

                blendIndices[0] = (byte)instanceIndex;

                blendIndices[1] = (byte)instanceIndex;

                blendIndices[2] = (byte)instanceIndex;

                blendIndices[3] = (byte)instanceIndex;


                byte[] blendWeight = BitConverter.GetBytes(1.0f);

                for (int i = 0; i < originalVertexCount; i++)


                    // Copy over the existing data for this vertex.

                    Array.Copy(oldVertexData, sourcePosition,

                               newVertexData, outputPosition, oldVertexStride);


                    outputPosition += oldVertexStride;

                    sourcePosition += oldVertexStride;


                    // Set the value of our new index channel.

                    blendIndices.CopyTo(newVertexData, outputPosition);

                    outputPosition += blendIndices.Length;


                    //copy blend weights

                    blendWeight.CopyTo(newVertexData, outputPosition);

                    outputPosition += blendWeight.Length;

                    blendWeight.CopyTo(newVertexData, outputPosition);

                    outputPosition += blendWeight.Length;

                    blendWeight.CopyTo(newVertexData, outputPosition);

                    outputPosition += blendWeight.Length;

                    blendWeight.CopyTo(newVertexData, outputPosition);

                    outputPosition += blendWeight.Length;




            int instanceIndexOffset = oldVertexStride;

            VertexElement[] extraElements =


                new VertexElement((short)oldVertexStride, VertexElementFormat.Byte4, VertexElementUsage.BlendIndices, 0),

                new VertexElement((short)oldVertexStride + (sizeof(byte) * 4), VertexElementFormat.Vector4, VertexElementUsage.BlendWeight, 0)



            int length = originalVertexDeclaration.GetVertexElements().Length + extraElements.Length;


            VertexElement[] elements = new VertexElement[length];

            originalVertexDeclaration.GetVertexElements().CopyTo(elements, 0);

            extraElements.CopyTo(elements, originalVertexDeclaration.GetVertexElements().Length);


            // Create a new vertex declaration.

            instancedVertexDeclaration = new VertexDeclaration(elements);


            // Create a new vertex buffer, and set the replicated data into it.

            instancedVertexBuffer = new VertexBuffer(graphicsDevice, instancedVertexDeclaration, newVertexData.Length, BufferUsage.None);




            //handle vertex indices

            originalIndexCount = part.IndexBuffer.IndexCount;

            ushort[] oldIndices = new ushort[originalIndexCount];



            // Allocate a temporary array to hold the replicated index data.

            ushort[] newIndices = new ushort[originalIndexCount * maxInstances];


            outputPosition = 0;

            // Replicate one copy of the original index buffer for each instance.

            for (int instanceIndex = 0; instanceIndex < maxInstances; instanceIndex++)


                int instanceOffset = instanceIndex * originalVertexCount;


                for (int i = 0; i < part.IndexBuffer.IndexCount; i++)


                    newIndices[outputPosition] = (ushort)(oldIndices[i] +







            // Create a new index buffer, and set the replicated data into it.

            instancedIndexBuffer = new IndexBuffer(graphicsDevice, IndexElementSize.SixteenBits, newIndices.Length, BufferUsage.None);




        public void Draw(Matrix[] transformMatrices, int totalInstances, SkinnedEffect skinnedEffect)


            BasicEffect effect = (BasicEffect)originalModel.Meshes[0].MeshParts[0].Effect;

            skinnedEffect.Texture = effect.Texture;



            graphicsDevice.Indices = instancedIndexBuffer;


            for (int i = 0; i < totalInstances; i += maxInstances)


                // How many instances can we fit into this batch?

                int instanceCount = totalInstances - i;


                if (instanceCount > maxInstances)

                    instanceCount = maxInstances;


                // Upload transform matrices as shader constants.

                Array.Copy(transformMatrices, i, tempMatrices, 0, instanceCount);




                foreach (EffectPass pass in skinnedEffect.CurrentTechnique.Passes)



                    graphicsDevice.DrawIndexedPrimitives(PrimitiveType.TriangleList, 0, 0, instanceCount * originalVertexCount, 0, instanceCount * originalIndexCount / 3);