/*
  Copyright 2004 Feyd
  Copyright 2004, 2005 Jean-Baptiste Note

  This file is part of prism54usb.

  prism54usb is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  prism54usb is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with prism54usb; if not, write to the Free Software Foundation,
  Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

*/

#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/smp_lock.h>
#include <linux/completion.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/usb.h>
#include <linux/pci.h>
#include <linux/skbuff.h>
#include <linux/version.h>
#include <asm/uaccess.h>

#include "isl_38xx.h"
#include "islusb_dev.h"
#include "isl_sm.h"
#include "islsm_log.h"
#include "islusb_net2280.h"

/* URB_ASYNC_UNLINK is not needed on kernels >= 2.6.14 */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
#define URB_ASYNC_UNLINK 0
#endif

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)
#define BULK_TIMEOUT   2000
#else
#define BULK_TIMEOUT   2 * HZ
#endif

static inline void
p54u_mdelay(int ms)
{
	/*
	 * XXX OK, this is ugly somewhat. In Linux, "sleep" sleeps,
	 * "delay" busy-waits. But in BSD, apparently, "delay" sleeps.
	 * Replace p54u_mdelay. We do not sync with p54u these days.
	 */
	msleep(ms);
}

int
p54u_bulk_msg(struct p54u *p54u, unsigned int ep, void *_data, int len)
{
	struct usb_device *usbdev = p54u->usbdev;
	unsigned char *data = _data;
	int alen;
	int pipe, err;

	islsm_txdata_debug(ep, data, len);

	pipe = usb_sndbulkpipe(usbdev, ep);
	err = usb_bulk_msg(usbdev, pipe, data, len, &alen, BULK_TIMEOUT);
	if (err)
		printk(KERN_ERR "%s: bulk submit failed: %i\n", DRV_NAME, err);

	return err;
}

int
p54u_submit_urb(struct urb *urb, int flags)
{
	int err;

	islsm_txdata_debug(urb->pipe, urb->transfer_buffer,
			   urb->transfer_buffer_length);

	urb->transfer_flags |= URB_ASYNC_UNLINK;
	urb->actual_length = 0;
	err = usb_submit_urb(urb, flags);

	if (err)
		islog(L_DEBUG, "%s: urb submit failed: %i\n", DRV_NAME, err);

	return err;
}

/* RX QUEUE MANAGEMENT */

/* callback common to all bulk endpoints */
static
void p54u_bulk_urb_callback(struct urb *urb, struct pt_regs *regs);
static
void p54u_nil_urb_callback(struct urb *urb, struct pt_regs *regs);
static struct urb *
p54u_alloc_urb(struct usb_device *usbdev, struct p54u_pipe *queue);

/* a bit dirty, but, hey... */
#define NETDEV_OF_P54U(x) (NETDEV_OF_ISLSM(container_of((void *)x, struct islsm, priv)))

/* Should be called under lock ? */
static int
rx_queue_add(struct p54u_pipe *queue, usb_command_t *cmd)
{
	struct p54u *p54u = queue->p54u;
	struct sk_buff *skb;
	int err;
	size_t buffer_size;
	struct urb *urb;

	/* realloc urb if needed */
	if (!cmd->data_urb)
		cmd->data_urb = p54u_alloc_urb(p54u->usbdev, queue);
	urb = cmd->data_urb;
	if (!urb)
		return -ENOMEM;

	WARN_ON(cmd->skb);
	buffer_size = queue->buffer_size;
	skb = dev_alloc_skb(buffer_size);

	if (!skb)
		return -ENOMEM;

	skb->input_dev = NETDEV_OF_P54U(p54u);

	/* realign buffer */
	skb_push(skb, skb_headroom(skb));
	skb_trim(skb, 0);

	cmd->skb = skb;

	urb->transfer_buffer = skb->data;
	urb->transfer_buffer_length = buffer_size;

	err = usb_submit_urb(urb, GFP_ATOMIC);
	if (err) {
		islog(L_DEBUG, "could not submit rx urb : %i\n", err);
		dev_kfree_skb(skb);
		cmd->skb = 0;
	}

	return err;
}

/* needed for ver1 devices */

static inline void
remove_net2280_header(struct sk_buff *skb)
{
	(void) skb_pull(skb, SIZE_RX);
	return;
}

static void
rx_cmd(struct p54u *p54u, struct sk_buff *skb) {
	if (p54u->state == P54U_RUN) {
		/* switch according to hardware type */
		if (p54u->device_version == P54U_DEVICE_VER1)
			remove_net2280_header(skb);
		islsm_data_input(skb);
	} else {
		if ((p54u->device_version == P54U_DEVICE_VER2) &&
		    (p54u->state == P54U_BOOT))
				islsm_bootup_input(skb);
		else {
			islog(L_DEBUG,
			      "dropping packet received while not running\n");
			dev_kfree_skb(skb);
		}
	}
}

/* This could to be replaced by skb_dequeue at some point */
static struct sk_buff *
rx_queue_skb_dequeue(struct p54u_pipe *queue, int *err) {
	unsigned len = queue->len;
	usb_command_t *cmd;
	struct sk_buff *skb = 0;
	struct urb *urb;
	unsigned long flags;

	/* after queue->f increase and spinlock release, the packet is
	   in another range and can be refilled at any moment by a
	   concurrent process -- so we need more work than just
	   returning it -- this will be sufficient when doing 
	   fully-dynamic allocation of urbs  */

	spin_lock_irqsave(&queue->lock,flags);

	if (queue->f >= queue->c)
		goto out;

	cmd = &queue->ringbuf[queue->f++ % len];

	skb = cmd->skb;
	cmd->skb = 0;

	urb = cmd->data_urb;
	if (urb->status) {
		/* urb was fucked up */
		islog(L_DEBUG, "rx urb bad status : %i\n",
		      urb->status);
		dev_kfree_skb(skb);
		*err = -EIO;
	} else {
		(void) skb_put(skb, urb->actual_length);
	}

 out:
	spin_unlock_irqrestore(&queue->lock,flags);

	return skb;
}

static int
rx_queue_process(struct p54u *p54u, struct p54u_pipe *queue)
{
	int err = 0;
	struct sk_buff *skb;

	/* for all acked packets in the queue */
	while ( (skb = rx_queue_skb_dequeue(queue, &err)) ) {
		if (err)
			break;
		rx_cmd(p54u, skb);
	}

	return err;
}

static usb_command_t *
_rx_queue_cmd_refill(struct p54u_pipe *queue) {
	usb_command_t *cmd = 0;
	unsigned len = queue->len;

 	if (queue->p < queue->f + len)
		cmd = &queue->ringbuf[queue->p++ % len];

	return cmd;
}

int
rx_queue_refill(struct p54u *p54u, struct p54u_pipe *queue)
{
	usb_command_t *cmd;
	int err = 0;
	unsigned long flags;

	if (p54u->state == P54U_SHUTDOWN) {
		islog(L_DEBUG, "not refilling queue %02x in shutdown state\n",
		      queue->addr);
		return 0;
	}

	spin_lock_irqsave(&queue->lock,flags);
	while ( (cmd = _rx_queue_cmd_refill(queue)) ) {
		err = rx_queue_add(queue, cmd);
		if (err)
			break;
	}
	spin_unlock_irqrestore(&queue->lock,flags);

	return err;
}

static void
p54u_rx_bh(unsigned long data)
{
	struct p54u_pipe *pipe = (void *)data;
	struct p54u *p54u = pipe->p54u;
	int err;

	err = rx_queue_process(p54u, pipe);
	if (!err)
		rx_queue_refill(p54u, pipe);

	return;
}

static inline void
rx_queue_cancel(struct p54u_pipe *queue, unsigned int index)
{
	usb_command_t *cmd = &queue->ringbuf[index % queue->len];
	struct urb *descr = cmd->descr_urb;
	if (descr)
		usb_unlink_urb(descr);
	usb_unlink_urb(cmd->data_urb);
}

static void
rx_queue_empty(struct p54u_pipe *queue)
{
	unsigned int start;
	unsigned int end;
	unsigned long flags;

	spin_lock_irqsave(&queue->lock,flags);
	start = queue->c;
	end = queue->p;
	while (start < end)
		rx_queue_cancel(queue, start++);
	spin_unlock_irqrestore(&queue->lock,flags);
}

/* TODO : queue unalloc function */

/* TX QUEUE MANAGEMENT */

/* called in non-atomic context */
static struct urb *
p54u_alloc_urb(struct usb_device *usbdev, struct p54u_pipe *queue)
{
	struct urb *urb;

	urb = usb_alloc_urb(0, GFP_ATOMIC);
	if (!urb)
		return 0;

	urb->transfer_flags |= URB_ASYNC_UNLINK;

	usb_fill_bulk_urb(urb, usbdev, queue->endp, 0,
			  queue->buffer_size,
			  p54u_bulk_urb_callback, queue);

	return urb;
}

static struct urb *
p54u_alloc_descr_urb(struct p54u *p54u, struct p54u_pipe *queue)
{
	/* TODO : fix size for version 2 devices */
	struct usb_device *usbdev = p54u->usbdev;
	unsigned int size = 16;
	struct urb *urb;
	void *buf;

	urb = usb_alloc_urb(0, GFP_KERNEL);
	if (!urb)
		return 0;

	urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP | URB_ASYNC_UNLINK;

	/* fill in the urb when in version 1 device ? */
	buf = usb_buffer_alloc(usbdev, size, GFP_KERNEL, &urb->transfer_dma);
	if (!buf) {
		islog(L_DEBUG, "could not allocate tx descr buf\n");
		usb_free_urb(urb);
		return 0;
	}

	/* the descr urb is very different from one device to the
	 * other */
	if (p54u->device_version == P54U_DEVICE_VER1) {
		int endp = usb_sndbulkpipe(usbdev,
					   P54U_PIPE_DEV &
					   USB_ENDPOINT_NUMBER_MASK);
		usb_fill_bulk_urb(urb, usbdev, endp, buf, 16,
				  p54u_nil_urb_callback, 0);
	} else {
		usb_fill_bulk_urb(urb, usbdev, queue->endp,
				  buf, 4, p54u_nil_urb_callback, 0);
	}

	return urb;
}

/* This function is only called during initialization of the queue */
static int
tx_queue_add(struct p54u *p54u, struct p54u_pipe *queue, unsigned int index)
{
	struct usb_device *usbdev = p54u->usbdev;
	usb_command_t *command;
	struct urb *urb;

	command = &queue->ringbuf[index];

	/* alloc needed urbs */
	urb = p54u_alloc_descr_urb(p54u, queue);
	if (!urb)
		goto fail;
	command->descr_urb = urb;


	urb = p54u_alloc_urb(usbdev, queue);
	if (!urb)
		goto fail_urb;
	command->data_urb = urb;

	return 0;

      fail_urb:
	/* FIXME we're leaking the buffer descr urb */
      fail:
	return -ENOMEM;
}

static void
tx_queue_process(struct p54u_pipe *queue)
{
	unsigned long final = queue->c;

	/* for all acked packets in the queue */
	BUG_ON(queue->f > final);
	while (queue->f < final) {
		unsigned int index = queue->f % queue->len;
		struct sk_buff *skb = queue->ringbuf[index].skb;
		queue->ringbuf[index].skb = (struct sk_buff *) 0;
		/* FIXME : the skb must be successfull for this to be
		   called */
		islsm_txskb_free(skb);
		queue->f++;
	}
}

/* This is only a stub now -- get rid of it */
static void
p54u_tx_bh(unsigned long data)
{
	struct p54u_pipe *pipe = (void *)data;
	unsigned long flags;

	spin_lock_irqsave(&pipe->lock,flags);
	tx_queue_process(pipe);
	spin_unlock_irqrestore(&pipe->lock,flags);

	return;
}

/* QUEUE MANAGEMENT */
/* Agnostic functions */
static void
p54u_bulk_urb_callback(struct urb *urb, struct pt_regs *regs)
{
	struct p54u_pipe *queue = (struct p54u_pipe *) urb->context;
	/* TODO : spin lock for c, or atomic. Check in what context the
	 * callback is called. Is it serialized against callbacks on the
	 * same endpoint ? */
	queue->c++;
	tasklet_schedule(&queue->task);

	return;
}

static void
p54u_nil_urb_callback(struct urb *urb, struct pt_regs *regs)
{
	return;
}

/* only these are exported */
int
p54u_queue_init(struct p54u *p54u,
		struct usb_endpoint_descriptor *desc, struct p54u_pipe *queue)
{
	struct usb_device *usbdev = p54u->usbdev;

	/* some needed fields init. Maybe I should just have the desc
	 * saved instead of copying all its fields. */
	/* please note : investigate what this wMaxPacketSize is worth
	 * and _use_it_ instead of a P54U_MAX_bidule
	 * -- it's worth nothing
	 *
	 */
	queue->addr = desc->bEndpointAddress;
	queue->interval = desc->bInterval;

	queue->p54u = p54u;
	queue->len = P54U_QUEUE_LEN;
	queue->buffer_size = P54U_MAX_FRAME_SIZE;

	if (queue->addr & USB_DIR_IN) {
		queue->endp =
			usb_rcvbulkpipe(usbdev,
					queue->
					addr & USB_ENDPOINT_NUMBER_MASK);
		tasklet_init(&queue->task, p54u_rx_bh,
			     (unsigned long)queue);
	} else {
		queue->endp =
			usb_sndbulkpipe(usbdev,
					queue->
					addr & USB_ENDPOINT_NUMBER_MASK);
		tasklet_init(&queue->task, p54u_tx_bh,
			     (unsigned long)queue);
	}

	spin_lock_init(&queue->lock);

	/* queue proper init */
	queue->f = queue->c = queue->p = 0;

	/* initialize arrays */
	queue->ringbuf =
	    kmalloc(sizeof (*queue->ringbuf) * queue->len, GFP_KERNEL);

	if (queue->ringbuf == 0)
		return -ENOMEM;

	/* zero out the thing */
	memset(queue->ringbuf, 0, sizeof (*queue->ringbuf) * queue->len);

	/* In case of an OUT endpoint, we allocate the data, because
	 * we'd better do this in interruptible context, whereas
	 * submission will be done in atomic context */
	if (!(queue->addr & USB_DIR_IN)) {
		unsigned int i;
		for (i = 0; i < queue->len; i++)
			tx_queue_add(p54u, queue, i);
	}

	return 0;
}

void
p54u_queue_destroy(struct p54u_pipe *queue)
{
	unsigned int i;

	if (!queue || !(queue->ringbuf))
		return;

	/* actually works for tx queues */
	rx_queue_empty(queue);

	/* wait for the urb to become free. Maybe i shouldn't set the
	 * async unlink flag ? */
	BUG_ON(queue->c > queue->p);
	while (queue->c < queue->p) {
		islog(L_DEBUG,
		      "waiting for urbs in [%i %i[, queue %02x to return\n",
		      queue->c, queue->p, queue->addr);
		// schedule ?
		p54u_mdelay(10);
	}

	/* wait for the last bh to do its job */
	/* upping f will happen in the bh */
	BUG_ON(queue->f > queue->c);
	while (queue->f < queue->c) {
		islog(L_DEBUG,
		      "waiting for urbs in [%i %i[, queue %02x to be bh'd\n",
		      queue->f, queue->c, queue->addr);
		tasklet_schedule(&queue->task);
	}

	tasklet_kill(&queue->task);

	/* free the alloc'd urbs. This is simple,
	   usb_free_urb and dev_kfree_skb can be called on nulls. */
	for (i = 0; i < queue->len; i++) {
		usb_command_t *cmd = &queue->ringbuf[i];
		usb_free_urb(cmd->data_urb);
		usb_free_urb(cmd->descr_urb);
		if (cmd->skb)
			dev_kfree_skb(cmd->skb);
	}

	kfree(queue->ringbuf);
	queue->ringbuf = 0;

	return;
}

/* TODO: move the usb_command_t into the control block -- and only queue
   skbs */
static int
tx_queue_submit_lm87_3887(usb_command_t *command, struct sk_buff *skb)
{
	u32 address = LMAC_ADDR_OF_SKB(skb);
	struct urb *data_urb = command->data_urb;

	/* for LM87 transport on the G3887, the
	   address is not a separate packet */
	*(u32 *)skb_push(skb, sizeof(u32)) = cpu_to_le32(address);

	/* prepare the data urb */
	data_urb->transfer_buffer = skb->data;
	data_urb->transfer_buffer_length = skb->len;

	/* submit the urbs, for now with debug */
	return p54u_submit_urb(data_urb, GFP_ATOMIC);
}

static int
tx_queue_submit_lm86_3887(usb_command_t *command, struct sk_buff *skb)
{
	u32 address = LMAC_ADDR_OF_SKB(skb);
	struct urb *data_urb = command->data_urb;
	struct urb *descr_urb = command->descr_urb;
	int err;

	/* for LM86 transport on the G3887, the address is a separate
	   packet */

	/* prepare the address URB */
	*((u32 *) descr_urb->transfer_buffer) = cpu_to_le32(address);
	/* should be taken care of during allocation */
	descr_urb->transfer_buffer_length = 4;

	data_urb->transfer_buffer = skb->data;
	data_urb->transfer_buffer_length = skb->len;

	/* submit the urbs, for now with debug */
	err = p54u_submit_urb(descr_urb, GFP_ATOMIC);

	if (!err)
		err = p54u_submit_urb(data_urb, GFP_ATOMIC);

	return err;
}

static int
tx_queue_submit_lm86_3886(usb_command_t *command, struct sk_buff *skb)
{
	int err;
	struct net2280_reg_write reg;
	unsigned int pipe;
	int val;
	struct urb *data_urb = command->data_urb;
	struct urb *descr_urb = command->descr_urb;
	/* for LM86 transport on the 3886 + net2280, the
	   data packet is announced by an interrupt request */

	/* prepare the interrupt URB */
	pipe = data_urb->pipe;
	switch (usb_pipeendpoint(pipe)) {
	case P54U_PIPE_DATA:
		val = ISL38XX_DEV_INT_DATA;
		break;
	case P54U_PIPE_MGMT:
		val = ISL38XX_DEV_INT_MGMT;
		break;
	default:
		return -EIO;
	}

	reg.port = cpu_to_le16(P54U_PORT_DEV_U32);
	reg.addr = cpu_to_le32(P54U_DEV_BASE | ISL38XX_DEV_INT_REG);
	reg.val = cpu_to_le32(val);

	memcpy(descr_urb->transfer_buffer, &reg, sizeof (reg));
	descr_urb->transfer_buffer_length = sizeof (reg);

	/* Additionally, the USB needs to be padded to 32-bits width. I
	   don't know what happens for the CRC in lm87 mode -- must it
	   be changed ?	*/

	if (skb->len % 4) {
		/* FIXME : is there enough room ? */
		unsigned int padding = 4 - (skb->len & 3);
		void *ref = skb_put(skb, padding);
		memset(ref, 0, padding);
	}

	data_urb->transfer_buffer = skb->data;
	data_urb->transfer_buffer_length = skb->len;

	err = p54u_submit_urb(descr_urb, GFP_ATOMIC);
	if (!err)
		err = p54u_submit_urb(data_urb, GFP_ATOMIC);

	return err;
}

/* TODO: when tx path has been simplified, this function will be
   duplicated into the chip-specific files, so that we don't have to
   case here against the device version -- which is completely useless,
   as this is called from device-specific callbacks... */
int
tx_queue_submit(struct p54u_pipe *queue, struct sk_buff *skb)
{
	struct islsm *islsm = ISLSM_OF_NETDEV(skb->dev);
	struct p54u *p54u = P54U_OF_ISLSM(islsm);
	usb_command_t *command;
	unsigned long flags;
	int err;

	if (p54u->state != P54U_RUN) {
		islog(L_DEBUG, "dismissed packet sending attempt while not running\n");
		return -EIO;
	}

	/* Lock until submitted, to make sure urb submit is in correct
	 * order wrt the queue */
	spin_lock_irqsave(&queue->lock, flags);

	BUG_ON(queue->p > queue->f + queue->len);
	if (queue->p == queue->f + queue->len) {
		err = -ENOSPC;
		goto err_unlock;
	}

	command = &queue->ringbuf[queue->p % queue->len];

	switch (p54u->device_version) {
	case P54U_DEVICE_VER2 :
	{
		switch (islsm->fw_type) {
		case ISLSM_FW_LM87:
			err = tx_queue_submit_lm87_3887(command, skb);
			break;
		case ISLSM_FW_LM86:
			err = tx_queue_submit_lm86_3887(command, skb);
			break;
		default:
			err = -EINVAL;
		}
	}
	break;
	case P54U_DEVICE_VER1 :
		err = tx_queue_submit_lm86_3886(command, skb);
		break;
	default:
		err = -EINVAL;
	}

	if (!err) {
		command->skb = skb;
		queue->p++;
	}

 err_unlock:
	spin_unlock_irqrestore(&queue->lock, flags);
	return err;
}
