¿Cómo realizo un filtro xBR o hqx en XNA?

11

Me gustaría ampliar mi juego con uno de los filtros hqx (hq2x, hq3x o hq4x) o un filtro xBR en un sombreador.

¿Cómo puedo hacer esto en XNA 4.0 y SM3?

Nota al margen: esta pregunta ha sido editada en gran medida para convertirse en lo que es ahora.

prueba
fuente
Pregunta interesante, tal vez el muestreo puntual con un filtro de proceso posterior FXAA crudo haría algo similar ... Solo una idea rápida, no lo he probado.
János Turánszki
en.wikipedia.org/wiki/Hqx explica aproximadamente cómo funcionan y tiene algunos enlaces a implementaciones.
Adam
1
Probablemente de interés github.com/pdjonov/hqnx
ClassicThunder
Obtuve una versión pirateada mediante el uso del hqxSharpproyecto, pero santa mierda es lenta (lo que advierte). Necesito algo que pueda mantener un framerate decente.
prueba
1
También pensé que CG era compatible con DirectX 9, que es en lo que se basa XNA. Intente compilar uno de los ejemplos en el enlace como si fuera un archivo HLSL. github.com/libretro/common-shaders/tree/master/hqx
ClassicThunder

Respuestas:

6

Puede reducir el recuento de instrucciones utilizando operaciones vectoriales: por ejemplo, en lugar de

edr = bool4((w1.x < w2.x) && ir_lv1.x, 
            (w1.y < w2.y) && ir_lv1.y, 
            (w1.z < w2.z) && ir_lv1.z, 
            (w1.w < w2.w) && ir_lv1.w);

puedes escribir

edr = (w1 < w2) && ir_lv1;

Los operadores en HLSL se pueden aplicar a vectores, incluso los lógicos, como &&dos bool3valores. Estos operadores realizarán la operación por componentes.

Código de sombreador

float2 texture_size;
float4x4 matrixTransform;

const static float coef = 2.0;
const static float3 yuv_weighted = float3(14.352, 28.176, 5.472);

sampler decal : register(s0);

float4 df(float4 A, float4 B)
{

    // begin optimization: reduction of 42 instruction slots
    float4 result = float4(A.x - B.x, A.y - B.y, A.z - B.z, A.w - B.w);

    return abs(result);
    // end optimization

    /* old code 

    //return float4(abs(A.x - B.x), abs(A.y - B.y), abs(A.z - B.z), abs(A.w - B.w));
    */
}

float4 weighted_distance(float4 a, float4 b, float4 c, float4 d, float4 e, float4 f, float4 g, float4 h)
{
    return (df(a, b) + df(a, c) + df(d, e) + df(d, f) + 4.0 * df(g, h));
}

float4 main_vertex(inout float2 texCoord : TEXCOORD0, inout float4 position : SV_Position) : TEXCOORD1
{
    float2 ps = float2(1.0 / texture_size.x, 1.0 / texture_size.y);
    float4 t1;

    t1.xy = float2(ps.x, 0); // F
    t1.zw = float2(0, ps.y); // H

    position = mul(position, matrixTransform);

    return t1;
}

/*    FRAGMENT SHADER    */
float4 main_fragment(float4 p : POSITION0, float2 tex0 : TEXCOORD0, float4 tex1 : TEXCOORD1) : COLOR0
{
    bool4 edr, edr_left, edr_up, px; // px = pixel, edr = edge detection rule
    bool4 ir_lv1, ir_lv2_left, ir_lv2_up;
    bool4 nc; // new_color
    bool4 fx, fx_left, fx_up; // inequations of straight lines.

    float2 fp = frac(tex0 * texture_size);
    float2 dx = tex1.xy;
    float2 dy = tex1.zw;

    float3 A = tex2D(decal, tex0 - dx - dy).xyz;
    float3 B = tex2D(decal, tex0 - dy).xyz;
    float3 C = tex2D(decal, tex0 + dx - dy).xyz;
    float3 D = tex2D(decal, tex0 - dx).xyz;
    float3 E = tex2D(decal, tex0).xyz;
    float3 F = tex2D(decal, tex0 + dx).xyz;
    float3 G = tex2D(decal, tex0 - dx + dy).xyz;
    float3 H = tex2D(decal, tex0 + dy).xyz;
    float3 I = tex2D(decal, tex0 + dx + dy).xyz;
    float3 A1 = tex2D(decal, tex0 - dx - 2.0*dy).xyz;
    float3 C1 = tex2D(decal, tex0 + dx - 2.0*dy).xyz;
    float3 A0 = tex2D(decal, tex0 - 2.0*dx - dy).xyz;
    float3 G0 = tex2D(decal, tex0 - 2.0*dx + dy).xyz;
    float3 C4 = tex2D(decal, tex0 + 2.0*dx - dy).xyz;
    float3 I4 = tex2D(decal, tex0 + 2.0*dx + dy).xyz;
    float3 G5 = tex2D(decal, tex0 - dx + 2.0*dy).xyz;
    float3 I5 = tex2D(decal, tex0 + dx + 2.0*dy).xyz;
    float3 B1 = tex2D(decal, tex0 - 2.0*dy).xyz;
    float3 D0 = tex2D(decal, tex0 - 2.0*dx).xyz;
    float3 H5 = tex2D(decal, tex0 + 2.0*dy).xyz;
    float3 F4 = tex2D(decal, tex0 + 2.0*dx).xyz;

    float4 b = mul(float4x3(B, D, H, F), yuv_weighted);
    float4 c = mul(float4x3(C, A, G, I), yuv_weighted);
    float4 e = mul(float4x3(E, E, E, E), yuv_weighted);
    float4 d = b.yzwx;
    float4 f = b.wxyz;
    float4 g = c.zwxy;
    float4 h = b.zwxy;
    float4 i = c.wxyz;

    float4 i4 = mul(float4x3(I4, C1, A0, G5), yuv_weighted);
    float4 i5 = mul(float4x3(I5, C4, A1, G0), yuv_weighted);
    float4 h5 = mul(float4x3(H5, F4, B1, D0), yuv_weighted);
    float4 f4 = h5.yzwx;

    float4 Ao = float4(1.0, -1.0, -1.0, 1.0);
    float4 Bo = float4(1.0, 1.0, -1.0, -1.0);
    float4 Co = float4(1.5, 0.5, -0.5, 0.5);
    float4 Ax = float4(1.0, -1.0, -1.0, 1.0);
    float4 Bx = float4(0.5, 2.0, -0.5, -2.0);
    float4 Cx = float4(1.0, 1.0, -0.5, 0.0);
    float4 Ay = float4(1.0, -1.0, -1.0, 1.0);
    float4 By = float4(2.0, 0.5, -2.0, -0.5);
    float4 Cy = float4(2.0, 0.0, -1.0, 0.5);

    // These inequations define the line below which interpolation occurs.
    fx.x = (Ao.x*fp.y + Bo.x*fp.x > Co.x);
    fx_left.x = (Ax.x*fp.y + Bx.x*fp.x > Cx.x);
    fx_up.x = (Ay.x*fp.y + By.x*fp.x > Cy.x);

    fx.y = (Ao.y*fp.y + Bo.y*fp.x > Co.y);
    fx_left.y = (Ax.y*fp.y + Bx.y*fp.x > Cx.y);
    fx_up.y = (Ay.y*fp.y + By.y*fp.x > Cy.y);

    fx.z = (Ao.z*fp.y + Bo.z*fp.x > Co.z);
    fx_left.z = (Ax.z*fp.y + Bx.z*fp.x > Cx.z);
    fx_up.z = (Ay.z*fp.y + By.z*fp.x > Cy.z);

    fx.w = (Ao.w*fp.y + Bo.w*fp.x > Co.w);
    fx_left.w = (Ax.w*fp.y + Bx.w*fp.x > Cx.w);
    fx_up.w = (Ay.w*fp.y + By.w*fp.x > Cy.w);

    //ir_lv1.x = ((e.x != f.x) && (e.x != h.x));
    //ir_lv1.y = ((e.y != f.y) && (e.y != h.y));
    //ir_lv1.z = ((e.z != f.z) && (e.z != h.z));
    //ir_lv1.w = ((e.w != f.w) && (e.w != h.w));
    ir_lv1 = ((e != f) && (e != h));

    //ir_lv2_left.x = ((e.x != g.x) && (d.x != g.x));
    //ir_lv2_left.y = ((e.y != g.y) && (d.y != g.y));
    //ir_lv2_left.z = ((e.z != g.z) && (d.z != g.z));
    //ir_lv2_left.w = ((e.w != g.w) && (d.w != g.w));
    ir_lv2_left = ((e != g) && (d != g));

    //ir_lv2_up.x = ((e.x != c.x) && (b.x != c.x));
    //ir_lv2_up.y = ((e.y != c.y) && (b.y != c.y));
    //ir_lv2_up.z = ((e.z != c.z) && (b.z != c.z));
    //ir_lv2_up.w = ((e.w != c.w) && (b.w != c.w));
    ir_lv2_up = ((e != c) && (b != c));

    float4 w1 = weighted_distance(e, c, g, i, h5, f4, h, f);
    float4 w2 = weighted_distance(h, d, i5, f, i4, b, e, i);

    // begin optimization: reduction of 6 instruction slots
    float4 df_fg = df(f, g);
    float4 df_hc = df(h, c);
    // end optimization

    float4 t1 = (coef * df_fg);
    float4 t2 = df_hc;
    float4 t3 = df_fg;
    float4 t4 = (coef * df_hc);

    //edr = bool4((w1.x < w2.x) && ir_lv1.x, 
    //            (w1.y < w2.y) && ir_lv1.y, 
    //            (w1.z < w2.z) && ir_lv1.z, 
    //            (w1.w < w2.w) && ir_lv1.w);
    edr = (w1 < w2) && ir_lv1;

    //edr_left = bool4((t1.x <= t2.x) && ir_lv2_left.x, 
    //                 (t1.y <= t2.y) && ir_lv2_left.y, 
    //                 (t1.z <= t2.z) && ir_lv2_left.z, 
    //                 (t1.w <= t2.w) && ir_lv2_left.w);
    edr_left = (t1 <= t2) && ir_lv2_left;

    //edr_up = bool4((t4.x <= t3.x) && ir_lv2_up.x, 
    //               (t4.y <= t3.y) && ir_lv2_up.y, 
    //               (t4.z <= t3.z) && ir_lv2_up.z, 
    //               (t4.w <= t3.w) && ir_lv2_up.w);
    edr_up = (t4 <= t3) && ir_lv2_up;

    //nc.x = (edr.x && (fx.x || edr_left.x && fx_left.x || edr_up.x && fx_up.x));
    //nc.y = (edr.y && (fx.y || edr_left.y && fx_left.y || edr_up.y && fx_up.y));
    //nc.z = (edr.z && (fx.z || edr_left.z && fx_left.z || edr_up.z && fx_up.z));
    //nc.w = (edr.w && (fx.w || edr_left.w && fx_left.w || edr_up.w && fx_up.w));
    nc = (edr && (fx || edr_left && fx_left || edr_up && fx_up));

    // to actually compile this shader, uncomment the following line
    // which reduces the instruction count to under 512
    //nc.zw = (float2)0;

    t1 = df(e, f);
    t2 = df(e, h);

    //px = bool4(t1.x <= t2.x, 
    //           t1.y <= t2.y, 
    //           t1.z <= t2.z, 
    //           t1.w <= t2.w);
    px = t1 <= t2;

    float3 res = nc.x ? px.x ? F : H : nc.y ? px.y ? B : F : nc.z ? px.z ? D : B : nc.w ? px.w ? H : D : E;

    return float4(res.x, res.y, res.z, 1.0);
}

technique mainTech
{
    pass mainPass
    {
        VertexShader = compile vs_3_0 main_vertex();
        PixelShader = compile ps_3_0 main_fragment();
    }
}

Imágenes

La imagen original de Redshrike se ha ampliado en un factor de 4.

  • Punto de muestreo

antes de

  • xBR

después

zogi
fuente
Ya usé esas optimizaciones en mi respuesta. Así fue como pude superar el error de ranura de instrucciones que estaba viendo.
prueba
Olvidalo entonces. Yo estaba un poco demasiado lento :)
zogi
ir_lv1 = ((e != f) && (e != h)); ir_lv2_left = ((e != g) && (d != g)); ir_lv2_up = ((e != c) && (b != c)); Esas son buenas optimizaciones que extrañé que encontraste, no terminé necesitándolas para mi problema porque pude reducir el recuento de instrucciones con otras optimizaciones.
prueba
Okay. Buen tema sin embargo. No he oído hablar de estos algoritmos antes de su pregunta. Encontré esta publicación de blog sobre hqx, que me ayudó a entender algo el algoritmo. Lo recomiendo mucho, si está interesado.
zogi
6

Tengo esto funcionando. No usa el filtro hqx, usa el filtro xBR (que prefiero). Para mí, esto no es un problema. Si necesita el filtro hqx, entonces querrá convertir los archivos .cg en su equivalente XNA apropiado.

Por razones completas y de búsqueda, editaré la pregunta para que sea más concisa y luego publicaré toda la información relevante para responder la pregunta aquí.


Paso 1: Configuración del código del juego

En primer lugar, lo más probable es que desee configurar un objetivo de renderizado donde dibuje su juego a escala 1: 1 y luego renderice el filtro.

using Microsoft.Xna.Framework;
using Microsoft.Xna.Framework.Graphics;

namespace xbr
{
    /// <summary>
    /// This is the main type for your game
    /// </summary>
    public class Game1 : Microsoft.Xna.Framework.Game
    {

        GraphicsDeviceManager graphics;
        SpriteBatch spriteBatch;
        RenderTarget2D renderTarget;
        Effect xbrEffect;
        Matrix projection;
        Matrix halfPixelOffset = Matrix.CreateTranslation(-0.5f, -0.5f, 0);
        Texture2D pretend240x160Scene;

        // the bounds of your 1:1 scene
        Rectangle renderBounds = new Rectangle(0, 0, 240, 160);

        // the bounds of your output scene (same w:h ratio)
        Rectangle outputBounds = new Rectangle(0, 0, 720, 480);

        public Game1()
        {
           base.Content.RootDirectory = "Content";

           this.graphics = new GraphicsDeviceManager(this);
           this.graphics.PreferredBackBufferWidth = outputBounds.Width;
           this.graphics.PreferredBackBufferHeight = outputBounds.Height;
        }

        /// <summary>
        /// Allows the game to perform any initialization it needs to before starting to run.
        /// This is where it can query for any required services and load any non-graphic
        /// related content.  Calling base.Initialize will enumerate through any components
        /// and initialize them as well.
        /// </summary>
        protected override void Initialize()
        {
            // TODO: Add your initialization logic here

            base.Initialize();
        }

        /// <summary>
        /// LoadContent will be called once per game and is the place to load
        /// all of your content.
        /// </summary>
        protected override void LoadContent()
        {
            // Create a new SpriteBatch, which can be used to draw textures.
            this.spriteBatch = new SpriteBatch(base.GraphicsDevice);
            this.xbrEffect = Content.Load<Effect>("xbr");

            // a fake scene that is a 240x160 image
            this.pretend240x160Scene = base.Content.Load<Texture2D>("240x160Scene");
            this.renderTarget = new RenderTarget2D(base.GraphicsDevice, this.renderBounds.Width, this.renderBounds.Height);

            // default vertex matrix for the vertex method
            this.projection = Matrix.CreateOrthographicOffCenter(0, this.outputBounds.Width, this.outputBounds.Height, 0, 0, 1);

            // set the values of this effect, should only have to do this once
            this.xbrEffect.Parameters["matrixTransform"].SetValue(halfPixelOffset * projection);
            this.xbrEffect.Parameters["textureSize"].SetValue(new float[] { renderBounds.Width, renderBounds.Height });
        }

        /// <summary>
        /// UnloadContent will be called once per game and is the place to unload
        /// all content.
        /// </summary>
        protected override void UnloadContent()
        {
        }

        /// <summary>
        /// Allows the game to run logic such as updating the world,
        /// checking for collisions, gathering input, and playing audio.
        /// </summary>
        /// <param name="gameTime">Provides a snapshot of timing values.</param>
        protected override void Update(GameTime gameTime)
        {
            base.Update(gameTime);
        }

        /// <summary>
        /// This is called when the game should draw itself.
        /// </summary>
        /// <param name="gameTime">Provides a snapshot of timing values.</param>
        protected override void Draw(GameTime gameTime)
        {
            base.GraphicsDevice.Clear(Color.CornflowerBlue);
            base.GraphicsDevice.SetRenderTarget(this.renderTarget);

            // draw your scene here scaled 1:1. for now I'll just draw
            // my fake 240x160 texture
            spriteBatch.Begin(SpriteSortMode.Deferred, BlendState.NonPremultiplied, 
                              SamplerState.PointClamp, null, null);

            spriteBatch.Draw(this.pretend240x160Scene, this.renderBounds, this.renderBounds, Color.White);

            spriteBatch.End();

            // now we'll draw to the back buffer
            base.GraphicsDevice.SetRenderTarget(null);

            // this renders the effect
            spriteBatch.Begin(SpriteSortMode.Immediate, BlendState.NonPremultiplied, 
                              SamplerState.PointClamp, null, null, this.xbrEffect);

            spriteBatch.Draw(this.renderTarget, this.outputBounds, this.renderBounds, Color.White);
            spriteBatch.End();

            base.Draw(gameTime);
        }
    }
}

Paso 2: archivo de efectos

El siguiente es el archivo de efectos compatible con XNA para realizar el filtro xBR.

// all identified optimizations have been amalgamated into this file
float2 textureSize;
float4x4 matrixTransform;

const static float coef = 2.0;
const static float3 yuv_weighted = float3(14.352, 28.176, 5.472);

sampler decal : register(s0);

float4 df(float4 A, float4 B)
{
    return abs(A - B);
}

float4 weighted_distance(float4 a, float4 b, float4 c, float4 d, 
                         float4 e, float4 f, float4 g, float4 h)
{
    return (df(a, b) + df(a, c) + df(d, e) + df(d, f) + 4.0 * df(g, h));
}

float4 main_vertex(inout float4 col0 : COLOR0, inout float2 tex0 : TEXCOORD0, 
                   inout float4 pos0 : POSITION0) : TEXCOORD1
{
    float2 ps = 1.0 / textureSize;

    pos0 = mul(pos0, matrixTransform);

    return float4(ps.x, 0, 0, ps.y);
}

float4 main_fragment(float4 pos0 : POSITION0, float2 tex0 : TEXCOORD0, 
                     float4 tex1 : TEXCOORD1) : COLOR0
{
    bool4 edr, edr_left, edr_up, px; // px = pixel, edr = edge detection rule
    bool4 ir_lv1, ir_lv2_left, ir_lv2_up;
    bool4 nc; // new_color
    bool4 fx, fx_left, fx_up; // inequations of straight lines.

    float2 fp = frac(tex0 * textureSize);
    float2 dx = tex1.xy;
    float2 dy = tex1.zw;

    float3 A  = tex2D(decal, tex0 - dx - dy).xyz;
    float3 B  = tex2D(decal, tex0 - dy).xyz;
    float3 C  = tex2D(decal, tex0 + dx - dy).xyz;
    float3 D  = tex2D(decal, tex0 - dx).xyz;
    float3 E  = tex2D(decal, tex0).xyz;
    float3 F  = tex2D(decal, tex0 + dx).xyz;
    float3 G  = tex2D(decal, tex0 - dx + dy).xyz;
    float3 H  = tex2D(decal, tex0 + dy).xyz;
    float3 I  = tex2D(decal, tex0 + dx + dy).xyz;
    float3 A1 = tex2D(decal, tex0 - dx - 2.0 * dy).xyz;
    float3 C1 = tex2D(decal, tex0 + dx - 2.0 * dy).xyz;
    float3 A0 = tex2D(decal, tex0 - 2.0 * dx - dy).xyz;
    float3 G0 = tex2D(decal, tex0 - 2.0 * dx + dy).xyz;
    float3 C4 = tex2D(decal, tex0 + 2.0 * dx - dy).xyz;
    float3 I4 = tex2D(decal, tex0 + 2.0 * dx + dy).xyz;
    float3 G5 = tex2D(decal, tex0 - dx + 2.0 * dy).xyz;
    float3 I5 = tex2D(decal, tex0 + dx + 2.0 * dy).xyz;
    float3 B1 = tex2D(decal, tex0 - 2.0 * dy).xyz;
    float3 D0 = tex2D(decal, tex0 - 2.0 * dx).xyz;
    float3 H5 = tex2D(decal, tex0 + 2.0 * dy).xyz;
    float3 F4 = tex2D(decal, tex0 + 2.0 * dx).xyz;

    float4 b = mul(float4x3(B, D, H, F), yuv_weighted);
    float4 c = mul(float4x3(C, A, G, I), yuv_weighted);
    float4 e = mul(float4x3(E, E, E, E), yuv_weighted);
    float4 d = b.yzwx;
    float4 f = b.wxyz;
    float4 g = c.zwxy;
    float4 h = b.zwxy;
    float4 i = c.wxyz;

    float4 i4 = mul(float4x3(I4, C1, A0, G5), yuv_weighted);
    float4 i5 = mul(float4x3(I5, C4, A1, G0), yuv_weighted);
    float4 h5 = mul(float4x3(H5, F4, B1, D0), yuv_weighted);
    float4 f4 = h5.yzwx;

    float4 Ao = float4(1.0, -1.0, -1.0, 1.0);
    float4 Bo = float4(1.0, 1.0, -1.0, -1.0);
    float4 Co = float4(1.5, 0.5, -0.5, 0.5);
    float4 Ax = float4(1.0, -1.0, -1.0, 1.0);
    float4 Bx = float4(0.5, 2.0, -0.5, -2.0);
    float4 Cx = float4(1.0, 1.0, -0.5, 0.0);
    float4 Ay = float4(1.0, -1.0, -1.0, 1.0);
    float4 By = float4(2.0, 0.5, -2.0, -0.5);
    float4 Cy = float4(2.0, 0.0, -1.0, 0.5);

    // These inequations define the line below which interpolation occurs.
    fx.x = (Ao.x * fp.y + Bo.x * fp.x > Co.x);
    fx.y = (Ao.y * fp.y + Bo.y * fp.x > Co.y);
    fx.z = (Ao.z * fp.y + Bo.z * fp.x > Co.z);
    fx.w = (Ao.w * fp.y + Bo.w * fp.x > Co.w);

    fx_left.x = (Ax.x * fp.y + Bx.x * fp.x > Cx.x);
    fx_left.y = (Ax.y * fp.y + Bx.y * fp.x > Cx.y);
    fx_left.z = (Ax.z * fp.y + Bx.z * fp.x > Cx.z);
    fx_left.w = (Ax.w * fp.y + Bx.w * fp.x > Cx.w);

    fx_up.x = (Ay.x * fp.y + By.x * fp.x > Cy.x);
    fx_up.y = (Ay.y * fp.y + By.y * fp.x > Cy.y);
    fx_up.z = (Ay.z * fp.y + By.z * fp.x > Cy.z);
    fx_up.w = (Ay.w * fp.y + By.w * fp.x > Cy.w);

    ir_lv1      = ((e != f) && (e != h));
    ir_lv2_left = ((e != g) && (d != g));
    ir_lv2_up   = ((e != c) && (b != c));

    float4 w1 = weighted_distance(e, c, g, i, h5, f4, h, f);
    float4 w2 = weighted_distance(h, d, i5, f, i4, b, e, i);
    float4 df_fg = df(f, g);
    float4 df_hc = df(h, c);
    float4 t1 = (coef * df_fg);
    float4 t2 = df_hc;
    float4 t3 = df_fg;
    float4 t4 = (coef * df_hc);

    edr      = (w1 < w2)  && ir_lv1;
    edr_left = (t1 <= t2) && ir_lv2_left;
    edr_up   = (t4 <= t3) && ir_lv2_up;

    nc = (edr && (fx || edr_left && fx_left || edr_up && fx_up));

    t1 = df(e, f);
    t2 = df(e, h);
    px = t1 <= t2;

    float3 res = nc.x ? px.x ? F : H : 
                 nc.y ? px.y ? B : F : 
                 nc.z ? px.z ? D : B : 
                 nc.w ? px.w ? H : D : E;

    return float4(res.xyz, 1.0);
}

technique T0
{
    pass P0
    {
        VertexShader = compile vs_3_0 main_vertex();
        PixelShader = compile ps_3_0 main_fragment();
    }
}

Resultados

La textura que utilicé para el render 240x160:

Entrada xBR

El resultado de ejecutar el juego:

Salida xBR

Fuentes

El archivo .cg que convertí en compatible con XNA vino de aquí . Entonces los créditos van a ellos por escribirlo.

prueba
fuente