Commit 364dbdf3b6c31a4a5fb7a6d479e7aafb4a7a10b6

Authored by Daniel Mack
Committed by Eric Miao
1 parent fe805986b2

video: add driver for PXA3xx 2D graphics accelerator

This adds a driver for the the 2D graphics accelerator found on PXA3xx
processors. Only resource mapping, interrupt handling and a simple ioctl
handler is done by the kernel part, the rest of the logic is implemented
in DirectFB userspace.

Graphic applications greatly benefit for line drawing, blend, and
rectangle and triangle filling operations.

Benchmarks done on a PXA303 using the df_dok benchmarking tool follow,
where the value in square brackets show the CPU usage during that test.

Without accelerator (benchmarking 256x252 on 480x262 RGB16 (16bit)):

  Anti-aliased Text                              3.016 secs (   65.649 KChars/sec) [ 99.6%]
  Fill Rectangle                                 3.021 secs (  175.107 MPixel/sec) [ 98.0%]
  Fill Rectangle (blend)                         3.582 secs (    3.602 MPixel/sec) [ 99.7%]
  Fill Rectangles [10]                           3.177 secs (  182.753 MPixel/sec) [ 98.1%]
  Fill Rectangles [10] (blend)                  18.020 secs (    3.580 MPixel/sec) [ 98.7%]
  Fill Spans                                     3.019 secs (  145.306 MPixel/sec) [ 98.0%]
  Fill Spans (blend)                             3.616 secs (    3.568 MPixel/sec) [ 99.4%]
  Blit                                           3.074 secs (   39.874 MPixel/sec) [ 98.0%]
  Blit 180                                       3.020 secs (   32.042 MPixel/sec) [ 98.0%]
  Blit with format conversion                    3.005 secs (   19.321 MPixel/sec) [ 99.6%]
  Blit from 32bit (blend)                        4.792 secs (    2.692 MPixel/sec) [ 98.7%]

With accelerator:

  Anti-aliased Text                              3.056 secs (*  36.518 KChars/sec) [ 21.3%]
  Fill Rectangle                                 3.015 secs (* 115.543 MPixel/sec) [  8.9%]
  Fill Rectangle (blend)                         3.180 secs (*  20.286 MPixel/sec) [  1.8%]
  Fill Rectangles [10]                           3.251 secs (* 119.062 MPixel/sec) [  1.2%]
  Fill Rectangles [10] (blend)                   6.293 secs (*  20.502 MPixel/sec) [  0.3%]
  Fill Spans                                     3.051 secs (*  97.264 MPixel/sec) [ 35.7%]
  Fill Spans (blend)                             3.377 secs (*  15.282 MPixel/sec) [ 17.8%]
  Blit                                           3.046 secs (*  27.533 MPixel/sec) [  2.6%]
  Blit 180                                       3.098 secs (*  27.070 MPixel/sec) [  2.2%]
  Blit with format conversion                    3.131 secs (*  39.148 MPixel/sec) [  2.8%]
  Blit from 32bit (blend)                        3.346 secs (*  11.568 MPixel/sec) [  0.8%]

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Tested-by: Sven Neumann <s.neumann@raumfeld.com>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Denis Oliver Kropp <dok@directfb.org>
Cc: Sven Neumann <s.neumann@raumfeld.com>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>

Showing 4 changed files with 821 additions and 0 deletions Side-by-side Diff

drivers/video/Kconfig
... ... @@ -1850,6 +1850,16 @@
1850 1850  
1851 1851 <file:Documentation/fb/pxafb.txt> describes the available parameters.
1852 1852  
  1853 +config PXA3XX_GCU
  1854 + tristate "PXA3xx 2D graphics accelerator driver"
  1855 + depends on FB_PXA
  1856 + help
  1857 + Kernelspace driver for the 2D graphics controller unit (GCU)
  1858 + found on PXA3xx processors. There is a counterpart driver in the
  1859 + DirectFB suite, see http://www.directfb.org/
  1860 +
  1861 + If you compile this as a module, it will be called pxa3xx_gcu.
  1862 +
1853 1863 config FB_MBX
1854 1864 tristate "2700G LCD framebuffer support"
1855 1865 depends on FB && ARCH_PXA
drivers/video/Makefile
... ... @@ -100,6 +100,7 @@
100 100 obj-$(CONFIG_FB_ASILIANT) += asiliantfb.o
101 101 obj-$(CONFIG_FB_PXA) += pxafb.o
102 102 obj-$(CONFIG_FB_PXA168) += pxa168fb.o
  103 +obj-$(CONFIG_PXA3XX_GCU) += pxa3xx-gcu.o
103 104 obj-$(CONFIG_FB_W100) += w100fb.o
104 105 obj-$(CONFIG_FB_TMIO) += tmiofb.o
105 106 obj-$(CONFIG_FB_AU1100) += au1100fb.o
drivers/video/pxa3xx-gcu.c
  1 +/*
  2 + * pxa3xx-gc.c - Linux kernel module for PXA3xx graphics controllers
  3 + *
  4 + * This driver needs a DirectFB counterpart in user space, communication
  5 + * is handled via mmap()ed memory areas and an ioctl.
  6 + *
  7 + * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
  8 + * Copyright (c) 2009 Janine Kropp <nin@directfb.org>
  9 + * Copyright (c) 2009 Denis Oliver Kropp <dok@directfb.org>
  10 + *
  11 + * This program is free software; you can redistribute it and/or modify
  12 + * it under the terms of the GNU General Public License as published by
  13 + * the Free Software Foundation; either version 2 of the License, or
  14 + * (at your option) any later version.
  15 + *
  16 + * This program is distributed in the hope that it will be useful,
  17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19 + * GNU General Public License for more details.
  20 + *
  21 + * You should have received a copy of the GNU General Public License
  22 + * along with this program; if not, write to the Free Software
  23 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 + */
  25 +
  26 +/*
  27 + * WARNING: This controller is attached to System Bus 2 of the PXA which
  28 + * needs its arbiter to be enabled explictly (CKENB & 1<<9).
  29 + * There is currently no way to do this from Linux, so you need to teach
  30 + * your bootloader for now.
  31 + */
  32 +
  33 +#include <linux/module.h>
  34 +#include <linux/version.h>
  35 +
  36 +#include <linux/platform_device.h>
  37 +#include <linux/dma-mapping.h>
  38 +#include <linux/miscdevice.h>
  39 +#include <linux/interrupt.h>
  40 +#include <linux/spinlock.h>
  41 +#include <linux/uaccess.h>
  42 +#include <linux/ioctl.h>
  43 +#include <linux/delay.h>
  44 +#include <linux/sched.h>
  45 +#include <linux/slab.h>
  46 +#include <linux/clk.h>
  47 +#include <linux/fs.h>
  48 +#include <linux/io.h>
  49 +
  50 +#include "pxa3xx-gcu.h"
  51 +
  52 +#define DRV_NAME "pxa3xx-gcu"
  53 +#define MISCDEV_MINOR 197
  54 +
  55 +#define REG_GCCR 0x00
  56 +#define GCCR_SYNC_CLR (1 << 9)
  57 +#define GCCR_BP_RST (1 << 8)
  58 +#define GCCR_ABORT (1 << 6)
  59 +#define GCCR_STOP (1 << 4)
  60 +
  61 +#define REG_GCISCR 0x04
  62 +#define REG_GCIECR 0x08
  63 +#define REG_GCRBBR 0x20
  64 +#define REG_GCRBLR 0x24
  65 +#define REG_GCRBHR 0x28
  66 +#define REG_GCRBTR 0x2C
  67 +#define REG_GCRBEXHR 0x30
  68 +
  69 +#define IE_EOB (1 << 0)
  70 +#define IE_EEOB (1 << 5)
  71 +#define IE_ALL 0xff
  72 +
  73 +#define SHARED_SIZE PAGE_ALIGN(sizeof(struct pxa3xx_gcu_shared))
  74 +
  75 +/* #define PXA3XX_GCU_DEBUG */
  76 +/* #define PXA3XX_GCU_DEBUG_TIMER */
  77 +
  78 +#ifdef PXA3XX_GCU_DEBUG
  79 +#define QDUMP(msg) \
  80 + do { \
  81 + QPRINT(priv, KERN_DEBUG, msg); \
  82 + } while (0)
  83 +#else
  84 +#define QDUMP(msg) do {} while (0)
  85 +#endif
  86 +
  87 +#define QERROR(msg) \
  88 + do { \
  89 + QPRINT(priv, KERN_ERR, msg); \
  90 + } while (0)
  91 +
  92 +struct pxa3xx_gcu_batch {
  93 + struct pxa3xx_gcu_batch *next;
  94 + u32 *ptr;
  95 + dma_addr_t phys;
  96 + unsigned long length;
  97 +};
  98 +
  99 +struct pxa3xx_gcu_priv {
  100 + void __iomem *mmio_base;
  101 + struct clk *clk;
  102 + struct pxa3xx_gcu_shared *shared;
  103 + dma_addr_t shared_phys;
  104 + struct resource *resource_mem;
  105 + struct miscdevice misc_dev;
  106 + struct file_operations misc_fops;
  107 + wait_queue_head_t wait_idle;
  108 + wait_queue_head_t wait_free;
  109 + spinlock_t spinlock;
  110 + struct timeval base_time;
  111 +
  112 + struct pxa3xx_gcu_batch *free;
  113 +
  114 + struct pxa3xx_gcu_batch *ready;
  115 + struct pxa3xx_gcu_batch *ready_last;
  116 + struct pxa3xx_gcu_batch *running;
  117 +};
  118 +
  119 +static inline unsigned long
  120 +gc_readl(struct pxa3xx_gcu_priv *priv, unsigned int off)
  121 +{
  122 + return __raw_readl(priv->mmio_base + off);
  123 +}
  124 +
  125 +static inline void
  126 +gc_writel(struct pxa3xx_gcu_priv *priv, unsigned int off, unsigned long val)
  127 +{
  128 + __raw_writel(val, priv->mmio_base + off);
  129 +}
  130 +
  131 +#define QPRINT(priv, level, msg) \
  132 + do { \
  133 + struct timeval tv; \
  134 + struct pxa3xx_gcu_shared *shared = priv->shared; \
  135 + u32 base = gc_readl(priv, REG_GCRBBR); \
  136 + \
  137 + do_gettimeofday(&tv); \
  138 + \
  139 + printk(level "%ld.%03ld.%03ld - %-17s: %-21s (%s, " \
  140 + "STATUS " \
  141 + "0x%02lx, B 0x%08lx [%ld], E %5ld, H %5ld, " \
  142 + "T %5ld)\n", \
  143 + tv.tv_sec - priv->base_time.tv_sec, \
  144 + tv.tv_usec / 1000, tv.tv_usec % 1000, \
  145 + __func__, msg, \
  146 + shared->hw_running ? "running" : " idle", \
  147 + gc_readl(priv, REG_GCISCR), \
  148 + gc_readl(priv, REG_GCRBBR), \
  149 + gc_readl(priv, REG_GCRBLR), \
  150 + (gc_readl(priv, REG_GCRBEXHR) - base) / 4, \
  151 + (gc_readl(priv, REG_GCRBHR) - base) / 4, \
  152 + (gc_readl(priv, REG_GCRBTR) - base) / 4); \
  153 + } while (0)
  154 +
  155 +static void
  156 +pxa3xx_gcu_reset(struct pxa3xx_gcu_priv *priv)
  157 +{
  158 + QDUMP("RESET");
  159 +
  160 + /* disable interrupts */
  161 + gc_writel(priv, REG_GCIECR, 0);
  162 +
  163 + /* reset hardware */
  164 + gc_writel(priv, REG_GCCR, GCCR_ABORT);
  165 + gc_writel(priv, REG_GCCR, 0);
  166 +
  167 + memset(priv->shared, 0, SHARED_SIZE);
  168 + priv->shared->buffer_phys = priv->shared_phys;
  169 + priv->shared->magic = PXA3XX_GCU_SHARED_MAGIC;
  170 +
  171 + do_gettimeofday(&priv->base_time);
  172 +
  173 + /* set up the ring buffer pointers */
  174 + gc_writel(priv, REG_GCRBLR, 0);
  175 + gc_writel(priv, REG_GCRBBR, priv->shared_phys);
  176 + gc_writel(priv, REG_GCRBTR, priv->shared_phys);
  177 +
  178 + /* enable all IRQs except EOB */
  179 + gc_writel(priv, REG_GCIECR, IE_ALL & ~IE_EOB);
  180 +}
  181 +
  182 +static void
  183 +dump_whole_state(struct pxa3xx_gcu_priv *priv)
  184 +{
  185 + struct pxa3xx_gcu_shared *sh = priv->shared;
  186 + u32 base = gc_readl(priv, REG_GCRBBR);
  187 +
  188 + QDUMP("DUMP");
  189 +
  190 + printk(KERN_DEBUG "== PXA3XX-GCU DUMP ==\n"
  191 + "%s, STATUS 0x%02lx, B 0x%08lx [%ld], E %5ld, H %5ld, T %5ld\n",
  192 + sh->hw_running ? "running" : "idle ",
  193 + gc_readl(priv, REG_GCISCR),
  194 + gc_readl(priv, REG_GCRBBR),
  195 + gc_readl(priv, REG_GCRBLR),
  196 + (gc_readl(priv, REG_GCRBEXHR) - base) / 4,
  197 + (gc_readl(priv, REG_GCRBHR) - base) / 4,
  198 + (gc_readl(priv, REG_GCRBTR) - base) / 4);
  199 +}
  200 +
  201 +static void
  202 +flush_running(struct pxa3xx_gcu_priv *priv)
  203 +{
  204 + struct pxa3xx_gcu_batch *running = priv->running;
  205 + struct pxa3xx_gcu_batch *next;
  206 +
  207 + while (running) {
  208 + next = running->next;
  209 + running->next = priv->free;
  210 + priv->free = running;
  211 + running = next;
  212 + }
  213 +
  214 + priv->running = NULL;
  215 +}
  216 +
  217 +static void
  218 +run_ready(struct pxa3xx_gcu_priv *priv)
  219 +{
  220 + unsigned int num = 0;
  221 + struct pxa3xx_gcu_shared *shared = priv->shared;
  222 + struct pxa3xx_gcu_batch *ready = priv->ready;
  223 +
  224 + QDUMP("Start");
  225 +
  226 + BUG_ON(!ready);
  227 +
  228 + shared->buffer[num++] = 0x05000000;
  229 +
  230 + while (ready) {
  231 + shared->buffer[num++] = 0x00000001;
  232 + shared->buffer[num++] = ready->phys;
  233 + ready = ready->next;
  234 + }
  235 +
  236 + shared->buffer[num++] = 0x05000000;
  237 + priv->running = priv->ready;
  238 + priv->ready = priv->ready_last = NULL;
  239 + gc_writel(priv, REG_GCRBLR, 0);
  240 + shared->hw_running = 1;
  241 +
  242 + /* ring base address */
  243 + gc_writel(priv, REG_GCRBBR, shared->buffer_phys);
  244 +
  245 + /* ring tail address */
  246 + gc_writel(priv, REG_GCRBTR, shared->buffer_phys + num * 4);
  247 +
  248 + /* ring length */
  249 + gc_writel(priv, REG_GCRBLR, ((num + 63) & ~63) * 4);
  250 +}
  251 +
  252 +static irqreturn_t
  253 +pxa3xx_gcu_handle_irq(int irq, void *ctx)
  254 +{
  255 + struct pxa3xx_gcu_priv *priv = ctx;
  256 + struct pxa3xx_gcu_shared *shared = priv->shared;
  257 + u32 status = gc_readl(priv, REG_GCISCR) & IE_ALL;
  258 +
  259 + QDUMP("-Interrupt");
  260 +
  261 + if (!status)
  262 + return IRQ_NONE;
  263 +
  264 + spin_lock(&priv->spinlock);
  265 + shared->num_interrupts++;
  266 +
  267 + if (status & IE_EEOB) {
  268 + QDUMP(" [EEOB]");
  269 +
  270 + flush_running(priv);
  271 + wake_up_all(&priv->wait_free);
  272 +
  273 + if (priv->ready) {
  274 + run_ready(priv);
  275 + } else {
  276 + /* There is no more data prepared by the userspace.
  277 + * Set hw_running = 0 and wait for the next userspace
  278 + * kick-off */
  279 + shared->num_idle++;
  280 + shared->hw_running = 0;
  281 +
  282 + QDUMP(" '-> Idle.");
  283 +
  284 + /* set ring buffer length to zero */
  285 + gc_writel(priv, REG_GCRBLR, 0);
  286 +
  287 + wake_up_all(&priv->wait_idle);
  288 + }
  289 +
  290 + shared->num_done++;
  291 + } else {
  292 + QERROR(" [???]");
  293 + dump_whole_state(priv);
  294 + }
  295 +
  296 + /* Clear the interrupt */
  297 + gc_writel(priv, REG_GCISCR, status);
  298 + spin_unlock(&priv->spinlock);
  299 +
  300 + return IRQ_HANDLED;
  301 +}
  302 +
  303 +static int
  304 +pxa3xx_gcu_wait_idle(struct pxa3xx_gcu_priv *priv)
  305 +{
  306 + int ret = 0;
  307 +
  308 + QDUMP("Waiting for idle...");
  309 +
  310 + /* Does not need to be atomic. There's a lock in user space,
  311 + * but anyhow, this is just for statistics. */
  312 + priv->shared->num_wait_idle++;
  313 +
  314 + while (priv->shared->hw_running) {
  315 + int num = priv->shared->num_interrupts;
  316 + u32 rbexhr = gc_readl(priv, REG_GCRBEXHR);
  317 +
  318 + ret = wait_event_interruptible_timeout(priv->wait_idle,
  319 + !priv->shared->hw_running, HZ*4);
  320 +
  321 + if (ret < 0)
  322 + break;
  323 +
  324 + if (ret > 0)
  325 + continue;
  326 +
  327 + if (gc_readl(priv, REG_GCRBEXHR) == rbexhr &&
  328 + priv->shared->num_interrupts == num) {
  329 + QERROR("TIMEOUT");
  330 + ret = -ETIMEDOUT;
  331 + break;
  332 + }
  333 + }
  334 +
  335 + QDUMP("done");
  336 +
  337 + return ret;
  338 +}
  339 +
  340 +static int
  341 +pxa3xx_gcu_wait_free(struct pxa3xx_gcu_priv *priv)
  342 +{
  343 + int ret = 0;
  344 +
  345 + QDUMP("Waiting for free...");
  346 +
  347 + /* Does not need to be atomic. There's a lock in user space,
  348 + * but anyhow, this is just for statistics. */
  349 + priv->shared->num_wait_free++;
  350 +
  351 + while (!priv->free) {
  352 + u32 rbexhr = gc_readl(priv, REG_GCRBEXHR);
  353 +
  354 + ret = wait_event_interruptible_timeout(priv->wait_free,
  355 + priv->free, HZ*4);
  356 +
  357 + if (ret < 0)
  358 + break;
  359 +
  360 + if (ret > 0)
  361 + continue;
  362 +
  363 + if (gc_readl(priv, REG_GCRBEXHR) == rbexhr) {
  364 + QERROR("TIMEOUT");
  365 + ret = -ETIMEDOUT;
  366 + break;
  367 + }
  368 + }
  369 +
  370 + QDUMP("done");
  371 +
  372 + return ret;
  373 +}
  374 +
  375 +/* Misc device layer */
  376 +
  377 +static ssize_t
  378 +pxa3xx_gcu_misc_write(struct file *filp, const char *buff,
  379 + size_t count, loff_t *offp)
  380 +{
  381 + int ret;
  382 + unsigned long flags;
  383 + struct pxa3xx_gcu_batch *buffer;
  384 + struct pxa3xx_gcu_priv *priv =
  385 + container_of(filp->f_op, struct pxa3xx_gcu_priv, misc_fops);
  386 +
  387 + int words = count / 4;
  388 +
  389 + /* Does not need to be atomic. There's a lock in user space,
  390 + * but anyhow, this is just for statistics. */
  391 + priv->shared->num_writes++;
  392 +
  393 + priv->shared->num_words += words;
  394 +
  395 + /* Last word reserved for batch buffer end command */
  396 + if (words >= PXA3XX_GCU_BATCH_WORDS)
  397 + return -E2BIG;
  398 +
  399 + /* Wait for a free buffer */
  400 + if (!priv->free) {
  401 + ret = pxa3xx_gcu_wait_free(priv);
  402 + if (ret < 0)
  403 + return ret;
  404 + }
  405 +
  406 + /*
  407 + * Get buffer from free list
  408 + */
  409 + spin_lock_irqsave(&priv->spinlock, flags);
  410 +
  411 + buffer = priv->free;
  412 + priv->free = buffer->next;
  413 +
  414 + spin_unlock_irqrestore(&priv->spinlock, flags);
  415 +
  416 +
  417 + /* Copy data from user into buffer */
  418 + ret = copy_from_user(buffer->ptr, buff, words * 4);
  419 + if (ret) {
  420 + spin_lock_irqsave(&priv->spinlock, flags);
  421 + buffer->next = priv->free;
  422 + priv->free = buffer;
  423 + spin_unlock_irqrestore(&priv->spinlock, flags);
  424 + return ret;
  425 + }
  426 +
  427 + buffer->length = words;
  428 +
  429 + /* Append batch buffer end command */
  430 + buffer->ptr[words] = 0x01000000;
  431 +
  432 + /*
  433 + * Add buffer to ready list
  434 + */
  435 + spin_lock_irqsave(&priv->spinlock, flags);
  436 +
  437 + buffer->next = NULL;
  438 +
  439 + if (priv->ready) {
  440 + BUG_ON(priv->ready_last == NULL);
  441 +
  442 + priv->ready_last->next = buffer;
  443 + } else
  444 + priv->ready = buffer;
  445 +
  446 + priv->ready_last = buffer;
  447 +
  448 + if (!priv->shared->hw_running)
  449 + run_ready(priv);
  450 +
  451 + spin_unlock_irqrestore(&priv->spinlock, flags);
  452 +
  453 + return words * 4;
  454 +}
  455 +
  456 +
  457 +static long
  458 +pxa3xx_gcu_misc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
  459 +{
  460 + unsigned long flags;
  461 + struct pxa3xx_gcu_priv *priv =
  462 + container_of(filp->f_op, struct pxa3xx_gcu_priv, misc_fops);
  463 +
  464 + switch (cmd) {
  465 + case PXA3XX_GCU_IOCTL_RESET:
  466 + spin_lock_irqsave(&priv->spinlock, flags);
  467 + pxa3xx_gcu_reset(priv);
  468 + spin_unlock_irqrestore(&priv->spinlock, flags);
  469 + return 0;
  470 +
  471 + case PXA3XX_GCU_IOCTL_WAIT_IDLE:
  472 + return pxa3xx_gcu_wait_idle(priv);
  473 + }
  474 +
  475 + return -ENOSYS;
  476 +}
  477 +
  478 +static int
  479 +pxa3xx_gcu_misc_mmap(struct file *filp, struct vm_area_struct *vma)
  480 +{
  481 + unsigned int size = vma->vm_end - vma->vm_start;
  482 + struct pxa3xx_gcu_priv *priv =
  483 + container_of(filp->f_op, struct pxa3xx_gcu_priv, misc_fops);
  484 +
  485 + switch (vma->vm_pgoff) {
  486 + case 0:
  487 + /* hand out the shared data area */
  488 + if (size != SHARED_SIZE)
  489 + return -EINVAL;
  490 +
  491 + return dma_mmap_coherent(NULL, vma,
  492 + priv->shared, priv->shared_phys, size);
  493 +
  494 + case SHARED_SIZE >> PAGE_SHIFT:
  495 + /* hand out the MMIO base for direct register access
  496 + * from userspace */
  497 + if (size != resource_size(priv->resource_mem))
  498 + return -EINVAL;
  499 +
  500 + vma->vm_flags |= VM_IO;
  501 + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  502 +
  503 + return io_remap_pfn_range(vma, vma->vm_start,
  504 + priv->resource_mem->start >> PAGE_SHIFT,
  505 + size, vma->vm_page_prot);
  506 + }
  507 +
  508 + return -EINVAL;
  509 +}
  510 +
  511 +
  512 +#ifdef PXA3XX_GCU_DEBUG_TIMER
  513 +static struct timer_list pxa3xx_gcu_debug_timer;
  514 +
  515 +static void pxa3xx_gcu_debug_timedout(unsigned long ptr)
  516 +{
  517 + struct pxa3xx_gcu_priv *priv = (struct pxa3xx_gcu_priv *) ptr;
  518 +
  519 + QERROR("Timer DUMP");
  520 +
  521 + /* init the timer structure */
  522 + init_timer(&pxa3xx_gcu_debug_timer);
  523 + pxa3xx_gcu_debug_timer.function = pxa3xx_gcu_debug_timedout;
  524 + pxa3xx_gcu_debug_timer.data = ptr;
  525 + pxa3xx_gcu_debug_timer.expires = jiffies + 5*HZ; /* one second */
  526 +
  527 + add_timer(&pxa3xx_gcu_debug_timer);
  528 +}
  529 +
  530 +static void pxa3xx_gcu_init_debug_timer(void)
  531 +{
  532 + pxa3xx_gcu_debug_timedout((unsigned long) &pxa3xx_gcu_debug_timer);
  533 +}
  534 +#else
  535 +static inline void pxa3xx_gcu_init_debug_timer(void) {}
  536 +#endif
  537 +
  538 +static int
  539 +add_buffer(struct platform_device *dev,
  540 + struct pxa3xx_gcu_priv *priv)
  541 +{
  542 + struct pxa3xx_gcu_batch *buffer;
  543 +
  544 + buffer = kzalloc(sizeof(struct pxa3xx_gcu_batch), GFP_KERNEL);
  545 + if (!buffer)
  546 + return -ENOMEM;
  547 +
  548 + buffer->ptr = dma_alloc_coherent(&dev->dev, PXA3XX_GCU_BATCH_WORDS * 4,
  549 + &buffer->phys, GFP_KERNEL);
  550 + if (!buffer->ptr) {
  551 + kfree(buffer);
  552 + return -ENOMEM;
  553 + }
  554 +
  555 + buffer->next = priv->free;
  556 +
  557 + priv->free = buffer;
  558 +
  559 + return 0;
  560 +}
  561 +
  562 +static void
  563 +free_buffers(struct platform_device *dev,
  564 + struct pxa3xx_gcu_priv *priv)
  565 +{
  566 + struct pxa3xx_gcu_batch *next, *buffer = priv->free;
  567 +
  568 + while (buffer) {
  569 + next = buffer->next;
  570 +
  571 + dma_free_coherent(&dev->dev, PXA3XX_GCU_BATCH_WORDS * 4,
  572 + buffer->ptr, buffer->phys);
  573 +
  574 + kfree(buffer);
  575 +
  576 + buffer = next;
  577 + }
  578 +
  579 + priv->free = NULL;
  580 +}
  581 +
  582 +static int __devinit
  583 +pxa3xx_gcu_probe(struct platform_device *dev)
  584 +{
  585 + int i, ret, irq;
  586 + struct resource *r;
  587 + struct pxa3xx_gcu_priv *priv;
  588 +
  589 + priv = kzalloc(sizeof(struct pxa3xx_gcu_priv), GFP_KERNEL);
  590 + if (!priv)
  591 + return -ENOMEM;
  592 +
  593 + for (i = 0; i < 8; i++) {
  594 + ret = add_buffer(dev, priv);
  595 + if (ret) {
  596 + dev_err(&dev->dev, "failed to allocate DMA memory\n");
  597 + goto err_free_priv;
  598 + }
  599 + }
  600 +
  601 + init_waitqueue_head(&priv->wait_idle);
  602 + init_waitqueue_head(&priv->wait_free);
  603 + spin_lock_init(&priv->spinlock);
  604 +
  605 + /* we allocate the misc device structure as part of our own allocation,
  606 + * so we can get a pointer to our priv structure later on with
  607 + * container_of(). This isn't really necessary as we have a fixed minor
  608 + * number anyway, but this is to avoid statics. */
  609 +
  610 + priv->misc_fops.owner = THIS_MODULE;
  611 + priv->misc_fops.write = pxa3xx_gcu_misc_write;
  612 + priv->misc_fops.unlocked_ioctl = pxa3xx_gcu_misc_ioctl;
  613 + priv->misc_fops.mmap = pxa3xx_gcu_misc_mmap;
  614 +
  615 + priv->misc_dev.minor = MISCDEV_MINOR,
  616 + priv->misc_dev.name = DRV_NAME,
  617 + priv->misc_dev.fops = &priv->misc_fops,
  618 +
  619 + /* register misc device */
  620 + ret = misc_register(&priv->misc_dev);
  621 + if (ret < 0) {
  622 + dev_err(&dev->dev, "misc_register() for minor %d failed\n",
  623 + MISCDEV_MINOR);
  624 + goto err_free_priv;
  625 + }
  626 +
  627 + /* handle IO resources */
  628 + r = platform_get_resource(dev, IORESOURCE_MEM, 0);
  629 + if (r == NULL) {
  630 + dev_err(&dev->dev, "no I/O memory resource defined\n");
  631 + ret = -ENODEV;
  632 + goto err_misc_deregister;
  633 + }
  634 +
  635 + if (!request_mem_region(r->start, resource_size(r), dev->name)) {
  636 + dev_err(&dev->dev, "failed to request I/O memory\n");
  637 + ret = -EBUSY;
  638 + goto err_misc_deregister;
  639 + }
  640 +
  641 + priv->mmio_base = ioremap_nocache(r->start, resource_size(r));
  642 + if (!priv->mmio_base) {
  643 + dev_err(&dev->dev, "failed to map I/O memory\n");
  644 + ret = -EBUSY;
  645 + goto err_free_mem_region;
  646 + }
  647 +
  648 + /* allocate dma memory */
  649 + priv->shared = dma_alloc_coherent(&dev->dev, SHARED_SIZE,
  650 + &priv->shared_phys, GFP_KERNEL);
  651 +
  652 + if (!priv->shared) {
  653 + dev_err(&dev->dev, "failed to allocate DMA memory\n");
  654 + ret = -ENOMEM;
  655 + goto err_free_io;
  656 + }
  657 +
  658 + /* enable the clock */
  659 + priv->clk = clk_get(&dev->dev, NULL);
  660 + if (IS_ERR(priv->clk)) {
  661 + dev_err(&dev->dev, "failed to get clock\n");
  662 + ret = -ENODEV;
  663 + goto err_free_dma;
  664 + }
  665 +
  666 + ret = clk_enable(priv->clk);
  667 + if (ret < 0) {
  668 + dev_err(&dev->dev, "failed to enable clock\n");
  669 + goto err_put_clk;
  670 + }
  671 +
  672 + /* request the IRQ */
  673 + irq = platform_get_irq(dev, 0);
  674 + if (irq < 0) {
  675 + dev_err(&dev->dev, "no IRQ defined\n");
  676 + ret = -ENODEV;
  677 + goto err_put_clk;
  678 + }
  679 +
  680 + ret = request_irq(irq, pxa3xx_gcu_handle_irq,
  681 + IRQF_DISABLED, DRV_NAME, priv);
  682 + if (ret) {
  683 + dev_err(&dev->dev, "request_irq failed\n");
  684 + ret = -EBUSY;
  685 + goto err_put_clk;
  686 + }
  687 +
  688 + platform_set_drvdata(dev, priv);
  689 + priv->resource_mem = r;
  690 + pxa3xx_gcu_reset(priv);
  691 + pxa3xx_gcu_init_debug_timer();
  692 +
  693 + dev_info(&dev->dev, "registered @0x%p, DMA 0x%p (%d bytes), IRQ %d\n",
  694 + (void *) r->start, (void *) priv->shared_phys,
  695 + SHARED_SIZE, irq);
  696 + return 0;
  697 +
  698 +err_put_clk:
  699 + clk_disable(priv->clk);
  700 + clk_put(priv->clk);
  701 +
  702 +err_free_dma:
  703 + dma_free_coherent(&dev->dev, SHARED_SIZE,
  704 + priv->shared, priv->shared_phys);
  705 +
  706 +err_free_io:
  707 + iounmap(priv->mmio_base);
  708 +
  709 +err_free_mem_region:
  710 + release_mem_region(r->start, resource_size(r));
  711 +
  712 +err_misc_deregister:
  713 + misc_deregister(&priv->misc_dev);
  714 +
  715 +err_free_priv:
  716 + platform_set_drvdata(dev, NULL);
  717 + free_buffers(dev, priv);
  718 + kfree(priv);
  719 + return ret;
  720 +}
  721 +
  722 +static int __devexit
  723 +pxa3xx_gcu_remove(struct platform_device *dev)
  724 +{
  725 + struct pxa3xx_gcu_priv *priv = platform_get_drvdata(dev);
  726 + struct resource *r = priv->resource_mem;
  727 +
  728 + pxa3xx_gcu_wait_idle(priv);
  729 +
  730 + misc_deregister(&priv->misc_dev);
  731 + dma_free_coherent(&dev->dev, SHARED_SIZE,
  732 + priv->shared, priv->shared_phys);
  733 + iounmap(priv->mmio_base);
  734 + release_mem_region(r->start, resource_size(r));
  735 + platform_set_drvdata(dev, NULL);
  736 + clk_disable(priv->clk);
  737 + free_buffers(dev, priv);
  738 + kfree(priv);
  739 +
  740 + return 0;
  741 +}
  742 +
  743 +static struct platform_driver pxa3xx_gcu_driver = {
  744 + .probe = pxa3xx_gcu_probe,
  745 + .remove = __devexit_p(pxa3xx_gcu_remove),
  746 + .driver = {
  747 + .owner = THIS_MODULE,
  748 + .name = DRV_NAME,
  749 + },
  750 +};
  751 +
  752 +static int __init
  753 +pxa3xx_gcu_init(void)
  754 +{
  755 + return platform_driver_register(&pxa3xx_gcu_driver);
  756 +}
  757 +
  758 +static void __exit
  759 +pxa3xx_gcu_exit(void)
  760 +{
  761 + platform_driver_unregister(&pxa3xx_gcu_driver);
  762 +}
  763 +
  764 +module_init(pxa3xx_gcu_init);
  765 +module_exit(pxa3xx_gcu_exit);
  766 +
  767 +MODULE_DESCRIPTION("PXA3xx graphics controller unit driver");
  768 +MODULE_LICENSE("GPL");
  769 +MODULE_ALIAS_MISCDEV(MISCDEV_MINOR);
  770 +MODULE_AUTHOR("Janine Kropp <nin@directfb.org>, "
  771 + "Denis Oliver Kropp <dok@directfb.org>, "
  772 + "Daniel Mack <daniel@caiaq.de>");
drivers/video/pxa3xx-gcu.h
  1 +#ifndef __PXA3XX_GCU_H__
  2 +#define __PXA3XX_GCU_H__
  3 +
  4 +#include <linux/types.h>
  5 +
  6 +/* Number of 32bit words in display list (ring buffer). */
  7 +#define PXA3XX_GCU_BUFFER_WORDS ((256 * 1024 - 256) / 4)
  8 +
  9 +/* To be increased when breaking the ABI */
  10 +#define PXA3XX_GCU_SHARED_MAGIC 0x30000001
  11 +
  12 +#define PXA3XX_GCU_BATCH_WORDS 8192
  13 +
  14 +struct pxa3xx_gcu_shared {
  15 + u32 buffer[PXA3XX_GCU_BUFFER_WORDS];
  16 +
  17 + bool hw_running;
  18 +
  19 + unsigned long buffer_phys;
  20 +
  21 + unsigned int num_words;
  22 + unsigned int num_writes;
  23 + unsigned int num_done;
  24 + unsigned int num_interrupts;
  25 + unsigned int num_wait_idle;
  26 + unsigned int num_wait_free;
  27 + unsigned int num_idle;
  28 +
  29 + u32 magic;
  30 +};
  31 +
  32 +/* Initialization and synchronization.
  33 + * Hardware is started upon write(). */
  34 +#define PXA3XX_GCU_IOCTL_RESET _IO('G', 0)
  35 +#define PXA3XX_GCU_IOCTL_WAIT_IDLE _IO('G', 2)
  36 +
  37 +#endif /* __PXA3XX_GCU_H__ */