Commit 8d49751580db804a02caf6a5b7cebe2ff26c0d7e

Authored by Caz Yokoyama
Committed by Greg Kroah-Hartman
1 parent 2141c7c5ee

Sample Implementation of Intel MIC User Space Daemon.

This patch introduces a sample user space daemon which
implements the virtio device backends on the host. The daemon
creates/removes/configures virtio device backends by communicating with
the Intel MIC Host Driver. The virtio devices currently supported are
virtio net, virtio console and virtio block. Virtio net supports TSO/GSO.
The daemon also monitors card shutdown status and takes appropriate actions
like killing the virtio backends and resetting the card upon card shutdown
and crashes.

Co-author: Ashutosh Dixit <ashutosh.dixit@intel.com>
Co-author: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Caz Yokoyama <Caz.Yokoyama@intel.com>
Signed-off-by: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@intel.com>
Signed-off-by: Nikhil Rao <nikhil.rao@intel.com>
Signed-off-by: Harshavardhan R Kharche <harshavardhan.r.kharche@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Acked-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Showing 8 changed files with 2347 additions and 0 deletions Side-by-side Diff

Documentation/mic/mic_overview.txt
  1 +An Intel MIC X100 device is a PCIe form factor add-in coprocessor
  2 +card based on the Intel Many Integrated Core (MIC) architecture
  3 +that runs a Linux OS. It is a PCIe endpoint in a platform and therefore
  4 +implements the three required standard address spaces i.e. configuration,
  5 +memory and I/O. The host OS loads a device driver as is typical for
  6 +PCIe devices. The card itself runs a bootstrap after reset that
  7 +transfers control to the card OS downloaded from the host driver.
  8 +The card OS as shipped by Intel is a Linux kernel with modifications
  9 +for the X100 devices.
  10 +
  11 +Since it is a PCIe card, it does not have the ability to host hardware
  12 +devices for networking, storage and console. We provide these devices
  13 +on X100 coprocessors thus enabling a self-bootable equivalent environment
  14 +for applications. A key benefit of our solution is that it leverages
  15 +the standard virtio framework for network, disk and console devices,
  16 +though in our case the virtio framework is used across a PCIe bus.
  17 +
  18 +Here is a block diagram of the various components described above. The
  19 +virtio backends are situated on the host rather than the card given better
  20 +single threaded performance for the host compared to MIC, the ability of
  21 +the host to initiate DMA's to/from the card using the MIC DMA engine and
  22 +the fact that the virtio block storage backend can only be on the host.
  23 +
  24 + |
  25 + +----------+ | +----------+
  26 + | Card OS | | | Host OS |
  27 + +----------+ | +----------+
  28 + |
  29 ++-------+ +--------+ +------+ | +---------+ +--------+ +--------+
  30 +| Virtio| |Virtio | |Virtio| | |Virtio | |Virtio | |Virtio |
  31 +| Net | |Console | |Block | | |Net | |Console | |Block |
  32 +| Driver| |Driver | |Driver| | |backend | |backend | |backend |
  33 ++-------+ +--------+ +------+ | +---------+ +--------+ +--------+
  34 + | | | | | | |
  35 + | | | |User | | |
  36 + | | | |------|------------|---------|-------
  37 + +-------------------+ |Kernel +--------------------------+
  38 + | | | Virtio over PCIe IOCTLs |
  39 + | | +--------------------------+
  40 + +--------------+ | |
  41 + |Intel MIC | | +---------------+
  42 + |Card Driver | | |Intel MIC |
  43 + +--------------+ | |Host Driver |
  44 + | | +---------------+
  45 + | | |
  46 + +-------------------------------------------------------------+
  47 + | |
  48 + | PCIe Bus |
  49 + +-------------------------------------------------------------+
Documentation/mic/mpssd/.gitignore
  1 +mpssd
Documentation/mic/mpssd/Makefile
  1 +#
  2 +# Makefile - Intel MIC User Space Tools.
  3 +# Copyright(c) 2013, Intel Corporation.
  4 +#
  5 +ifdef DEBUG
  6 +CFLAGS += $(USERWARNFLAGS) -I. -g -Wall -DDEBUG=$(DEBUG)
  7 +else
  8 +CFLAGS += $(USERWARNFLAGS) -I. -g -Wall
  9 +endif
  10 +
  11 +mpssd: mpssd.o sysfs.o
  12 + $(CC) $(CFLAGS) -o $@ $^ -lpthread
  13 +
  14 +install:
  15 + install mpssd /usr/sbin/mpssd
  16 + install micctrl /usr/sbin/micctrl
  17 +
  18 +clean:
  19 + rm -f mpssd *.o
Documentation/mic/mpssd/micctrl
  1 +#!/bin/bash
  2 +# Intel MIC Platform Software Stack (MPSS)
  3 +#
  4 +# Copyright(c) 2013 Intel Corporation.
  5 +#
  6 +# This program is free software; you can redistribute it and/or modify
  7 +# it under the terms of the GNU General Public License, version 2, as
  8 +# published by the Free Software Foundation.
  9 +#
  10 +# This program is distributed in the hope that it will be useful, but
  11 +# WITHOUT ANY WARRANTY; without even the implied warranty of
  12 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 +# General Public License for more details.
  14 +#
  15 +# The full GNU General Public License is included in this distribution in
  16 +# the file called "COPYING".
  17 +#
  18 +# Intel MIC User Space Tools.
  19 +#
  20 +# micctrl - Controls MIC boot/start/stop.
  21 +#
  22 +# chkconfig: 2345 95 05
  23 +# description: start MPSS stack processing.
  24 +#
  25 +### BEGIN INIT INFO
  26 +# Provides: micctrl
  27 +### END INIT INFO
  28 +
  29 +# Source function library.
  30 +. /etc/init.d/functions
  31 +
  32 +sysfs="/sys/class/mic"
  33 +
  34 +_status()
  35 +{
  36 + f=$sysfs/$1
  37 + echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`"
  38 +}
  39 +
  40 +status()
  41 +{
  42 + if [ "`echo $1 | head -c3`" == "mic" ]; then
  43 + _status $1
  44 + return $?
  45 + fi
  46 + for f in $sysfs/*
  47 + do
  48 + _status `basename $f`
  49 + RETVAL=$?
  50 + [ $RETVAL -ne 0 ] && return $RETVAL
  51 + done
  52 + return 0
  53 +}
  54 +
  55 +_reset()
  56 +{
  57 + f=$sysfs/$1
  58 + echo reset > $f/state
  59 +}
  60 +
  61 +reset()
  62 +{
  63 + if [ "`echo $1 | head -c3`" == "mic" ]; then
  64 + _reset $1
  65 + return $?
  66 + fi
  67 + for f in $sysfs/*
  68 + do
  69 + _reset `basename $f`
  70 + RETVAL=$?
  71 + [ $RETVAL -ne 0 ] && return $RETVAL
  72 + done
  73 + return 0
  74 +}
  75 +
  76 +_boot()
  77 +{
  78 + f=$sysfs/$1
  79 + echo "linux" > $f/bootmode
  80 + echo "mic/uos.img" > $f/firmware
  81 + echo "mic/$1.image" > $f/ramdisk
  82 + echo "boot" > $f/state
  83 +}
  84 +
  85 +boot()
  86 +{
  87 + if [ "`echo $1 | head -c3`" == "mic" ]; then
  88 + _boot $1
  89 + return $?
  90 + fi
  91 + for f in $sysfs/*
  92 + do
  93 + _boot `basename $f`
  94 + RETVAL=$?
  95 + [ $RETVAL -ne 0 ] && return $RETVAL
  96 + done
  97 + return 0
  98 +}
  99 +
  100 +_shutdown()
  101 +{
  102 + f=$sysfs/$1
  103 + echo shutdown > $f/state
  104 +}
  105 +
  106 +shutdown()
  107 +{
  108 + if [ "`echo $1 | head -c3`" == "mic" ]; then
  109 + _shutdown $1
  110 + return $?
  111 + fi
  112 + for f in $sysfs/*
  113 + do
  114 + _shutdown `basename $f`
  115 + RETVAL=$?
  116 + [ $RETVAL -ne 0 ] && return $RETVAL
  117 + done
  118 + return 0
  119 +}
  120 +
  121 +_wait()
  122 +{
  123 + f=$sysfs/$1
  124 + while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ]
  125 + do
  126 + sleep 1
  127 + echo -e "Waiting for $1 to go offline"
  128 + done
  129 +}
  130 +
  131 +wait()
  132 +{
  133 + if [ "`echo $1 | head -c3`" == "mic" ]; then
  134 + _wait $1
  135 + return $?
  136 + fi
  137 + # Wait for the cards to go offline
  138 + for f in $sysfs/*
  139 + do
  140 + _wait `basename $f`
  141 + RETVAL=$?
  142 + [ $RETVAL -ne 0 ] && return $RETVAL
  143 + done
  144 + return 0
  145 +}
  146 +
  147 +if [ ! -d "$sysfs" ]; then
  148 + echo -e $"Module unloaded "
  149 + exit 3
  150 +fi
  151 +
  152 +case $1 in
  153 + -s)
  154 + status $2
  155 + ;;
  156 + -r)
  157 + reset $2
  158 + ;;
  159 + -b)
  160 + boot $2
  161 + ;;
  162 + -S)
  163 + shutdown $2
  164 + ;;
  165 + -w)
  166 + wait $2
  167 + ;;
  168 + *)
  169 + echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}"
  170 + exit 2
  171 +esac
  172 +
  173 +exit $?
Documentation/mic/mpssd/mpss
  1 +#!/bin/bash
  2 +# Intel MIC Platform Software Stack (MPSS)
  3 +#
  4 +# Copyright(c) 2013 Intel Corporation.
  5 +#
  6 +# This program is free software; you can redistribute it and/or modify
  7 +# it under the terms of the GNU General Public License, version 2, as
  8 +# published by the Free Software Foundation.
  9 +#
  10 +# This program is distributed in the hope that it will be useful, but
  11 +# WITHOUT ANY WARRANTY; without even the implied warranty of
  12 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 +# General Public License for more details.
  14 +#
  15 +# The full GNU General Public License is included in this distribution in
  16 +# the file called "COPYING".
  17 +#
  18 +# Intel MIC User Space Tools.
  19 +#
  20 +# mpss Start mpssd.
  21 +#
  22 +# chkconfig: 2345 95 05
  23 +# description: start MPSS stack processing.
  24 +#
  25 +### BEGIN INIT INFO
  26 +# Provides: mpss
  27 +# Required-Start:
  28 +# Required-Stop:
  29 +# Short-Description: MPSS stack control
  30 +# Description: MPSS stack control
  31 +### END INIT INFO
  32 +
  33 +# Source function library.
  34 +. /etc/init.d/functions
  35 +
  36 +exec=/usr/sbin/mpssd
  37 +sysfs="/sys/class/mic"
  38 +
  39 +start()
  40 +{
  41 + [ -x $exec ] || exit 5
  42 +
  43 + if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then
  44 + echo -e $"MPSSD already running! "
  45 + success
  46 + echo
  47 + return 0
  48 + fi
  49 +
  50 + echo -e $"Starting MPSS Stack"
  51 + echo -e $"Loading MIC_HOST Module"
  52 +
  53 + # Ensure the driver is loaded
  54 + if [ ! -d "$sysfs" ]; then
  55 + modprobe mic_host
  56 + RETVAL=$?
  57 + if [ $RETVAL -ne 0 ]; then
  58 + failure
  59 + echo
  60 + return $RETVAL
  61 + fi
  62 + fi
  63 +
  64 + # Start the daemon
  65 + echo -n $"Starting MPSSD "
  66 + $exec
  67 + RETVAL=$?
  68 + if [ $RETVAL -ne 0 ]; then
  69 + failure
  70 + echo
  71 + return $RETVAL
  72 + fi
  73 + success
  74 + echo
  75 +
  76 + sleep 5
  77 +
  78 + # Boot the cards
  79 + micctrl -b
  80 +
  81 + # Wait till ping works
  82 + for f in $sysfs/*
  83 + do
  84 + count=100
  85 + ipaddr=`cat $f/cmdline`
  86 + ipaddr=${ipaddr#*address,}
  87 + ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1`
  88 + while [ $count -ge 0 ]
  89 + do
  90 + echo -e "Pinging "`basename $f`" "
  91 + ping -c 1 $ipaddr &> /dev/null
  92 + RETVAL=$?
  93 + if [ $RETVAL -eq 0 ]; then
  94 + success
  95 + break
  96 + fi
  97 + sleep 1
  98 + count=`expr $count - 1`
  99 + done
  100 + [ $RETVAL -ne 0 ] && failure || success
  101 + echo
  102 + done
  103 + return $RETVAL
  104 +}
  105 +
  106 +stop()
  107 +{
  108 + echo -e $"Shutting down MPSS Stack: "
  109 +
  110 + # Bail out if module is unloaded
  111 + if [ ! -d "$sysfs" ]; then
  112 + echo -n $"Module unloaded "
  113 + success
  114 + echo
  115 + return 0
  116 + fi
  117 +
  118 + # Shut down the cards.
  119 + micctrl -S
  120 +
  121 + # Wait for the cards to go offline
  122 + for f in $sysfs/*
  123 + do
  124 + while [ "`cat $f/state`" != "offline" ]
  125 + do
  126 + sleep 1
  127 + echo -e "Waiting for "`basename $f`" to go offline"
  128 + done
  129 + done
  130 +
  131 + # Display the status of the cards
  132 + micctrl -s
  133 +
  134 + # Kill MPSSD now
  135 + echo -n $"Killing MPSSD"
  136 + killall -9 mpssd 2>/dev/null
  137 + RETVAL=$?
  138 + [ $RETVAL -ne 0 ] && failure || success
  139 + echo
  140 + return $RETVAL
  141 +}
  142 +
  143 +restart()
  144 +{
  145 + stop
  146 + sleep 5
  147 + start
  148 +}
  149 +
  150 +status()
  151 +{
  152 + micctrl -s
  153 + if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then
  154 + echo "mpssd is running"
  155 + else
  156 + echo "mpssd is stopped"
  157 + fi
  158 + return 0
  159 +}
  160 +
  161 +unload()
  162 +{
  163 + if [ ! -d "$sysfs" ]; then
  164 + echo -n $"No MIC_HOST Module: "
  165 + success
  166 + echo
  167 + return
  168 + fi
  169 +
  170 + stop
  171 +
  172 + sleep 5
  173 + echo -n $"Removing MIC_HOST Module: "
  174 + modprobe -r mic_host
  175 + RETVAL=$?
  176 + [ $RETVAL -ne 0 ] && failure || success
  177 + echo
  178 + return $RETVAL
  179 +}
  180 +
  181 +case $1 in
  182 + start)
  183 + start
  184 + ;;
  185 + stop)
  186 + stop
  187 + ;;
  188 + restart)
  189 + restart
  190 + ;;
  191 + status)
  192 + status
  193 + ;;
  194 + unload)
  195 + unload
  196 + ;;
  197 + *)
  198 + echo $"Usage: $0 {start|stop|restart|status|unload}"
  199 + exit 2
  200 +esac
  201 +
  202 +exit $?
Documentation/mic/mpssd/mpssd.c
Changes suppressed. Click to show
  1 +/*
  2 + * Intel MIC Platform Software Stack (MPSS)
  3 + *
  4 + * Copyright(c) 2013 Intel Corporation.
  5 + *
  6 + * This program is free software; you can redistribute it and/or modify
  7 + * it under the terms of the GNU General Public License, version 2, as
  8 + * published by the Free Software Foundation.
  9 + *
  10 + * This program is distributed in the hope that it will be useful, but
  11 + * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 + * General Public License for more details.
  14 + *
  15 + * The full GNU General Public License is included in this distribution in
  16 + * the file called "COPYING".
  17 + *
  18 + * Intel MIC User Space Tools.
  19 + */
  20 +
  21 +#define _GNU_SOURCE
  22 +
  23 +#include <stdlib.h>
  24 +#include <fcntl.h>
  25 +#include <getopt.h>
  26 +#include <assert.h>
  27 +#include <unistd.h>
  28 +#include <stdbool.h>
  29 +#include <signal.h>
  30 +#include <poll.h>
  31 +#include <features.h>
  32 +#include <sys/types.h>
  33 +#include <sys/stat.h>
  34 +#include <sys/mman.h>
  35 +#include <sys/socket.h>
  36 +#include <linux/virtio_ring.h>
  37 +#include <linux/virtio_net.h>
  38 +#include <linux/virtio_console.h>
  39 +#include <linux/virtio_blk.h>
  40 +#include <linux/version.h>
  41 +#include "mpssd.h"
  42 +#include <linux/mic_ioctl.h>
  43 +#include <linux/mic_common.h>
  44 +
  45 +static void init_mic(struct mic_info *mic);
  46 +
  47 +static FILE *logfp;
  48 +static struct mic_info mic_list;
  49 +
  50 +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
  51 +
  52 +#define min_t(type, x, y) ({ \
  53 + type __min1 = (x); \
  54 + type __min2 = (y); \
  55 + __min1 < __min2 ? __min1 : __min2; })
  56 +
  57 +/* align addr on a size boundary - adjust address up/down if needed */
  58 +#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
  59 +#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
  60 +
  61 +/* align addr on a size boundary - adjust address up if needed */
  62 +#define _ALIGN(addr, size) _ALIGN_UP(addr, size)
  63 +
  64 +/* to align the pointer to the (next) page boundary */
  65 +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
  66 +
  67 +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  68 +
  69 +#define GSO_ENABLED 1
  70 +#define MAX_GSO_SIZE (64 * 1024)
  71 +#define ETH_H_LEN 14
  72 +#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
  73 +#define MIC_DEVICE_PAGE_END 0x1000
  74 +
  75 +#ifndef VIRTIO_NET_HDR_F_DATA_VALID
  76 +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
  77 +#endif
  78 +
  79 +static struct {
  80 + struct mic_device_desc dd;
  81 + struct mic_vqconfig vqconfig[2];
  82 + __u32 host_features, guest_acknowledgements;
  83 + struct virtio_console_config cons_config;
  84 +} virtcons_dev_page = {
  85 + .dd = {
  86 + .type = VIRTIO_ID_CONSOLE,
  87 + .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
  88 + .feature_len = sizeof(virtcons_dev_page.host_features),
  89 + .config_len = sizeof(virtcons_dev_page.cons_config),
  90 + },
  91 + .vqconfig[0] = {
  92 + .num = htole16(MIC_VRING_ENTRIES),
  93 + },
  94 + .vqconfig[1] = {
  95 + .num = htole16(MIC_VRING_ENTRIES),
  96 + },
  97 +};
  98 +
  99 +static struct {
  100 + struct mic_device_desc dd;
  101 + struct mic_vqconfig vqconfig[2];
  102 + __u32 host_features, guest_acknowledgements;
  103 + struct virtio_net_config net_config;
  104 +} virtnet_dev_page = {
  105 + .dd = {
  106 + .type = VIRTIO_ID_NET,
  107 + .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
  108 + .feature_len = sizeof(virtnet_dev_page.host_features),
  109 + .config_len = sizeof(virtnet_dev_page.net_config),
  110 + },
  111 + .vqconfig[0] = {
  112 + .num = htole16(MIC_VRING_ENTRIES),
  113 + },
  114 + .vqconfig[1] = {
  115 + .num = htole16(MIC_VRING_ENTRIES),
  116 + },
  117 +#if GSO_ENABLED
  118 + .host_features = htole32(
  119 + 1 << VIRTIO_NET_F_CSUM |
  120 + 1 << VIRTIO_NET_F_GSO |
  121 + 1 << VIRTIO_NET_F_GUEST_TSO4 |
  122 + 1 << VIRTIO_NET_F_GUEST_TSO6 |
  123 + 1 << VIRTIO_NET_F_GUEST_ECN |
  124 + 1 << VIRTIO_NET_F_GUEST_UFO),
  125 +#else
  126 + .host_features = 0,
  127 +#endif
  128 +};
  129 +
  130 +static const char *mic_config_dir = "/etc/sysconfig/mic";
  131 +static const char *virtblk_backend = "VIRTBLK_BACKEND";
  132 +static struct {
  133 + struct mic_device_desc dd;
  134 + struct mic_vqconfig vqconfig[1];
  135 + __u32 host_features, guest_acknowledgements;
  136 + struct virtio_blk_config blk_config;
  137 +} virtblk_dev_page = {
  138 + .dd = {
  139 + .type = VIRTIO_ID_BLOCK,
  140 + .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
  141 + .feature_len = sizeof(virtblk_dev_page.host_features),
  142 + .config_len = sizeof(virtblk_dev_page.blk_config),
  143 + },
  144 + .vqconfig[0] = {
  145 + .num = htole16(MIC_VRING_ENTRIES),
  146 + },
  147 + .host_features =
  148 + htole32(1<<VIRTIO_BLK_F_SEG_MAX),
  149 + .blk_config = {
  150 + .seg_max = htole32(MIC_VRING_ENTRIES - 2),
  151 + .capacity = htole64(0),
  152 + }
  153 +};
  154 +
  155 +static char *myname;
  156 +
  157 +static int
  158 +tap_configure(struct mic_info *mic, char *dev)
  159 +{
  160 + pid_t pid;
  161 + char *ifargv[7];
  162 + char ipaddr[IFNAMSIZ];
  163 + int ret = 0;
  164 +
  165 + pid = fork();
  166 + if (pid == 0) {
  167 + ifargv[0] = "ip";
  168 + ifargv[1] = "link";
  169 + ifargv[2] = "set";
  170 + ifargv[3] = dev;
  171 + ifargv[4] = "up";
  172 + ifargv[5] = NULL;
  173 + mpsslog("Configuring %s\n", dev);
  174 + ret = execvp("ip", ifargv);
  175 + if (ret < 0) {
  176 + mpsslog("%s execvp failed errno %s\n",
  177 + mic->name, strerror(errno));
  178 + return ret;
  179 + }
  180 + }
  181 + if (pid < 0) {
  182 + mpsslog("%s fork failed errno %s\n",
  183 + mic->name, strerror(errno));
  184 + return ret;
  185 + }
  186 +
  187 + ret = waitpid(pid, NULL, 0);
  188 + if (ret < 0) {
  189 + mpsslog("%s waitpid failed errno %s\n",
  190 + mic->name, strerror(errno));
  191 + return ret;
  192 + }
  193 +
  194 + snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
  195 +
  196 + pid = fork();
  197 + if (pid == 0) {
  198 + ifargv[0] = "ip";
  199 + ifargv[1] = "addr";
  200 + ifargv[2] = "add";
  201 + ifargv[3] = ipaddr;
  202 + ifargv[4] = "dev";
  203 + ifargv[5] = dev;
  204 + ifargv[6] = NULL;
  205 + mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
  206 + ret = execvp("ip", ifargv);
  207 + if (ret < 0) {
  208 + mpsslog("%s execvp failed errno %s\n",
  209 + mic->name, strerror(errno));
  210 + return ret;
  211 + }
  212 + }
  213 + if (pid < 0) {
  214 + mpsslog("%s fork failed errno %s\n",
  215 + mic->name, strerror(errno));
  216 + return ret;
  217 + }
  218 +
  219 + ret = waitpid(pid, NULL, 0);
  220 + if (ret < 0) {
  221 + mpsslog("%s waitpid failed errno %s\n",
  222 + mic->name, strerror(errno));
  223 + return ret;
  224 + }
  225 + mpsslog("MIC name %s %s %d DONE!\n",
  226 + mic->name, __func__, __LINE__);
  227 + return 0;
  228 +}
  229 +
  230 +static int tun_alloc(struct mic_info *mic, char *dev)
  231 +{
  232 + struct ifreq ifr;
  233 + int fd, err;
  234 +#if GSO_ENABLED
  235 + unsigned offload;
  236 +#endif
  237 + fd = open("/dev/net/tun", O_RDWR);
  238 + if (fd < 0) {
  239 + mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
  240 + goto done;
  241 + }
  242 +
  243 + memset(&ifr, 0, sizeof(ifr));
  244 +
  245 + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
  246 + if (*dev)
  247 + strncpy(ifr.ifr_name, dev, IFNAMSIZ);
  248 +
  249 + err = ioctl(fd, TUNSETIFF, (void *) &ifr);
  250 + if (err < 0) {
  251 + mpsslog("%s %s %d TUNSETIFF failed %s\n",
  252 + mic->name, __func__, __LINE__, strerror(errno));
  253 + close(fd);
  254 + return err;
  255 + }
  256 +#if GSO_ENABLED
  257 + offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
  258 + TUN_F_TSO_ECN | TUN_F_UFO;
  259 +
  260 + err = ioctl(fd, TUNSETOFFLOAD, offload);
  261 + if (err < 0) {
  262 + mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
  263 + mic->name, __func__, __LINE__, strerror(errno));
  264 + close(fd);
  265 + return err;
  266 + }
  267 +#endif
  268 + strcpy(dev, ifr.ifr_name);
  269 + mpsslog("Created TAP %s\n", dev);
  270 +done:
  271 + return fd;
  272 +}
  273 +
  274 +#define NET_FD_VIRTIO_NET 0
  275 +#define NET_FD_TUN 1
  276 +#define MAX_NET_FD 2
  277 +
  278 +static void set_dp(struct mic_info *mic, int type, void *dp)
  279 +{
  280 + switch (type) {
  281 + case VIRTIO_ID_CONSOLE:
  282 + mic->mic_console.console_dp = dp;
  283 + return;
  284 + case VIRTIO_ID_NET:
  285 + mic->mic_net.net_dp = dp;
  286 + return;
  287 + case VIRTIO_ID_BLOCK:
  288 + mic->mic_virtblk.block_dp = dp;
  289 + return;
  290 + }
  291 + mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  292 + assert(0);
  293 +}
  294 +
  295 +static void *get_dp(struct mic_info *mic, int type)
  296 +{
  297 + switch (type) {
  298 + case VIRTIO_ID_CONSOLE:
  299 + return mic->mic_console.console_dp;
  300 + case VIRTIO_ID_NET:
  301 + return mic->mic_net.net_dp;
  302 + case VIRTIO_ID_BLOCK:
  303 + return mic->mic_virtblk.block_dp;
  304 + }
  305 + mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  306 + assert(0);
  307 + return NULL;
  308 +}
  309 +
  310 +static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
  311 +{
  312 + struct mic_device_desc *d;
  313 + int i;
  314 + void *dp = get_dp(mic, type);
  315 +
  316 + for (i = mic_aligned_size(struct mic_bootparam); i < PAGE_SIZE;
  317 + i += mic_total_desc_size(d)) {
  318 + d = dp + i;
  319 +
  320 + /* End of list */
  321 + if (d->type == 0)
  322 + break;
  323 +
  324 + if (d->type == -1)
  325 + continue;
  326 +
  327 + mpsslog("%s %s d-> type %d d %p\n",
  328 + mic->name, __func__, d->type, d);
  329 +
  330 + if (d->type == (__u8)type)
  331 + return d;
  332 + }
  333 + mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  334 + assert(0);
  335 + return NULL;
  336 +}
  337 +
  338 +/* See comments in vhost.c for explanation of next_desc() */
  339 +static unsigned next_desc(struct vring_desc *desc)
  340 +{
  341 + unsigned int next;
  342 +
  343 + if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
  344 + return -1U;
  345 + next = le16toh(desc->next);
  346 + return next;
  347 +}
  348 +
  349 +/* Sum up all the IOVEC length */
  350 +static ssize_t
  351 +sum_iovec_len(struct mic_copy_desc *copy)
  352 +{
  353 + ssize_t sum = 0;
  354 + int i;
  355 +
  356 + for (i = 0; i < copy->iovcnt; i++)
  357 + sum += copy->iov[i].iov_len;
  358 + return sum;
  359 +}
  360 +
  361 +static inline void verify_out_len(struct mic_info *mic,
  362 + struct mic_copy_desc *copy)
  363 +{
  364 + if (copy->out_len != sum_iovec_len(copy)) {
  365 + mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%x\n",
  366 + mic->name, __func__, __LINE__,
  367 + copy->out_len, sum_iovec_len(copy));
  368 + assert(copy->out_len == sum_iovec_len(copy));
  369 + }
  370 +}
  371 +
  372 +/* Display an iovec */
  373 +static void
  374 +disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
  375 + const char *s, int line)
  376 +{
  377 + int i;
  378 +
  379 + for (i = 0; i < copy->iovcnt; i++)
  380 + mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%lx\n",
  381 + mic->name, s, line, i,
  382 + copy->iov[i].iov_base, copy->iov[i].iov_len);
  383 +}
  384 +
  385 +static inline __u16 read_avail_idx(struct mic_vring *vr)
  386 +{
  387 + return ACCESS_ONCE(vr->info->avail_idx);
  388 +}
  389 +
  390 +static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
  391 + struct mic_copy_desc *copy, ssize_t len)
  392 +{
  393 + copy->vr_idx = tx ? 0 : 1;
  394 + copy->update_used = true;
  395 + if (type == VIRTIO_ID_NET)
  396 + copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
  397 + else
  398 + copy->iov[0].iov_len = len;
  399 +}
  400 +
  401 +/* Central API which triggers the copies */
  402 +static int
  403 +mic_virtio_copy(struct mic_info *mic, int fd,
  404 + struct mic_vring *vr, struct mic_copy_desc *copy)
  405 +{
  406 + int ret;
  407 +
  408 + ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
  409 + if (ret) {
  410 + mpsslog("%s %s %d errno %s ret %d\n",
  411 + mic->name, __func__, __LINE__,
  412 + strerror(errno), ret);
  413 + }
  414 + return ret;
  415 +}
  416 +
  417 +/*
  418 + * This initialization routine requires at least one
  419 + * vring i.e. vr0. vr1 is optional.
  420 + */
  421 +static void *
  422 +init_vr(struct mic_info *mic, int fd, int type,
  423 + struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
  424 +{
  425 + int vr_size;
  426 + char *va;
  427 +
  428 + vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
  429 + MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
  430 + va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
  431 + PROT_READ, MAP_SHARED, fd, 0);
  432 + if (MAP_FAILED == va) {
  433 + mpsslog("%s %s %d mmap failed errno %s\n",
  434 + mic->name, __func__, __LINE__,
  435 + strerror(errno));
  436 + goto done;
  437 + }
  438 + set_dp(mic, type, va);
  439 + vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
  440 + vr0->info = vr0->va +
  441 + vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
  442 + vring_init(&vr0->vr,
  443 + MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
  444 + mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
  445 + __func__, mic->name, vr0->va, vr0->info, vr_size,
  446 + vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
  447 + mpsslog("magic 0x%x expected 0x%x\n",
  448 + vr0->info->magic, MIC_MAGIC + type);
  449 + assert(vr0->info->magic == MIC_MAGIC + type);
  450 + if (vr1) {
  451 + vr1->va = (struct mic_vring *)
  452 + &va[MIC_DEVICE_PAGE_END + vr_size];
  453 + vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
  454 + MIC_VIRTIO_RING_ALIGN);
  455 + vring_init(&vr1->vr,
  456 + MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
  457 + mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
  458 + __func__, mic->name, vr1->va, vr1->info, vr_size,
  459 + vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
  460 + mpsslog("magic 0x%x expected 0x%x\n",
  461 + vr1->info->magic, MIC_MAGIC + type + 1);
  462 + assert(vr1->info->magic == MIC_MAGIC + type + 1);
  463 + }
  464 +done:
  465 + return va;
  466 +}
  467 +
  468 +static void
  469 +wait_for_card_driver(struct mic_info *mic, int fd, int type)
  470 +{
  471 + struct pollfd pollfd;
  472 + int err;
  473 + struct mic_device_desc *desc = get_device_desc(mic, type);
  474 +
  475 + pollfd.fd = fd;
  476 + mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
  477 + mic->name, __func__, type, desc->status);
  478 + while (1) {
  479 + pollfd.events = POLLIN;
  480 + pollfd.revents = 0;
  481 + err = poll(&pollfd, 1, -1);
  482 + if (err < 0) {
  483 + mpsslog("%s %s poll failed %s\n",
  484 + mic->name, __func__, strerror(errno));
  485 + continue;
  486 + }
  487 +
  488 + if (pollfd.revents) {
  489 + mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
  490 + mic->name, __func__, type, desc->status);
  491 + if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
  492 + mpsslog("%s %s poll.revents %d\n",
  493 + mic->name, __func__, pollfd.revents);
  494 + mpsslog("%s %s desc-> type %d status 0x%x\n",
  495 + mic->name, __func__, type,
  496 + desc->status);
  497 + break;
  498 + }
  499 + }
  500 + }
  501 +}
  502 +
  503 +/* Spin till we have some descriptors */
  504 +static void
  505 +spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
  506 +{
  507 + __u16 avail_idx = read_avail_idx(vr);
  508 +
  509 + while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
  510 +#ifdef DEBUG
  511 + mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
  512 + mic->name, __func__,
  513 + le16toh(vr->vr.avail->idx), vr->info->avail_idx);
  514 +#endif
  515 + sched_yield();
  516 + }
  517 +}
  518 +
  519 +static void *
  520 +virtio_net(void *arg)
  521 +{
  522 + static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
  523 + static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __aligned(64);
  524 + struct iovec vnet_iov[2][2] = {
  525 + { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
  526 + { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
  527 + { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
  528 + { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
  529 + };
  530 + struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
  531 + struct mic_info *mic = (struct mic_info *)arg;
  532 + char if_name[IFNAMSIZ];
  533 + struct pollfd net_poll[MAX_NET_FD];
  534 + struct mic_vring tx_vr, rx_vr;
  535 + struct mic_copy_desc copy;
  536 + struct mic_device_desc *desc;
  537 + int err;
  538 +
  539 + snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
  540 + mic->mic_net.tap_fd = tun_alloc(mic, if_name);
  541 + if (mic->mic_net.tap_fd < 0)
  542 + goto done;
  543 +
  544 + if (tap_configure(mic, if_name))
  545 + goto done;
  546 + mpsslog("MIC name %s id %d\n", mic->name, mic->id);
  547 +
  548 + net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
  549 + net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
  550 + net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
  551 + net_poll[NET_FD_TUN].events = POLLIN;
  552 +
  553 + if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
  554 + VIRTIO_ID_NET, &tx_vr, &rx_vr,
  555 + virtnet_dev_page.dd.num_vq)) {
  556 + mpsslog("%s init_vr failed %s\n",
  557 + mic->name, strerror(errno));
  558 + goto done;
  559 + }
  560 +
  561 + copy.iovcnt = 2;
  562 + desc = get_device_desc(mic, VIRTIO_ID_NET);
  563 +
  564 + while (1) {
  565 + ssize_t len;
  566 +
  567 + net_poll[NET_FD_VIRTIO_NET].revents = 0;
  568 + net_poll[NET_FD_TUN].revents = 0;
  569 +
  570 + /* Start polling for data from tap and virtio net */
  571 + err = poll(net_poll, 2, -1);
  572 + if (err < 0) {
  573 + mpsslog("%s poll failed %s\n",
  574 + __func__, strerror(errno));
  575 + continue;
  576 + }
  577 + if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
  578 + wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
  579 + VIRTIO_ID_NET);
  580 + /*
  581 + * Check if there is data to be read from TUN and write to
  582 + * virtio net fd if there is.
  583 + */
  584 + if (net_poll[NET_FD_TUN].revents & POLLIN) {
  585 + copy.iov = iov0;
  586 + len = readv(net_poll[NET_FD_TUN].fd,
  587 + copy.iov, copy.iovcnt);
  588 + if (len > 0) {
  589 + struct virtio_net_hdr *hdr
  590 + = (struct virtio_net_hdr *) vnet_hdr[0];
  591 +
  592 + /* Disable checksums on the card since we are on
  593 + a reliable PCIe link */
  594 + hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
  595 +#ifdef DEBUG
  596 + mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
  597 + __func__, __LINE__, hdr->flags);
  598 + mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
  599 + copy.out_len, hdr->gso_type);
  600 +#endif
  601 +#ifdef DEBUG
  602 + disp_iovec(mic, copy, __func__, __LINE__);
  603 + mpsslog("%s %s %d read from tap 0x%lx\n",
  604 + mic->name, __func__, __LINE__,
  605 + len);
  606 +#endif
  607 + spin_for_descriptors(mic, &tx_vr);
  608 + txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
  609 + len);
  610 +
  611 + err = mic_virtio_copy(mic,
  612 + mic->mic_net.virtio_net_fd, &tx_vr,
  613 + &copy);
  614 + if (err < 0) {
  615 + mpsslog("%s %s %d mic_virtio_copy %s\n",
  616 + mic->name, __func__, __LINE__,
  617 + strerror(errno));
  618 + }
  619 + if (!err)
  620 + verify_out_len(mic, &copy);
  621 +#ifdef DEBUG
  622 + disp_iovec(mic, copy, __func__, __LINE__);
  623 + mpsslog("%s %s %d wrote to net 0x%lx\n",
  624 + mic->name, __func__, __LINE__,
  625 + sum_iovec_len(&copy));
  626 +#endif
  627 + /* Reinitialize IOV for next run */
  628 + iov0[1].iov_len = MAX_NET_PKT_SIZE;
  629 + } else if (len < 0) {
  630 + disp_iovec(mic, &copy, __func__, __LINE__);
  631 + mpsslog("%s %s %d read failed %s ", mic->name,
  632 + __func__, __LINE__, strerror(errno));
  633 + mpsslog("cnt %d sum %d\n",
  634 + copy.iovcnt, sum_iovec_len(&copy));
  635 + }
  636 + }
  637 +
  638 + /*
  639 + * Check if there is data to be read from virtio net and
  640 + * write to TUN if there is.
  641 + */
  642 + if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
  643 + while (rx_vr.info->avail_idx !=
  644 + le16toh(rx_vr.vr.avail->idx)) {
  645 + copy.iov = iov1;
  646 + txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
  647 + MAX_NET_PKT_SIZE
  648 + + sizeof(struct virtio_net_hdr));
  649 +
  650 + err = mic_virtio_copy(mic,
  651 + mic->mic_net.virtio_net_fd, &rx_vr,
  652 + &copy);
  653 + if (!err) {
  654 +#ifdef DEBUG
  655 + struct virtio_net_hdr *hdr
  656 + = (struct virtio_net_hdr *)
  657 + vnet_hdr[1];
  658 +
  659 + mpsslog("%s %s %d hdr->flags 0x%x, ",
  660 + mic->name, __func__, __LINE__,
  661 + hdr->flags);
  662 + mpsslog("out_len %d gso_type 0x%x\n",
  663 + copy.out_len,
  664 + hdr->gso_type);
  665 +#endif
  666 + /* Set the correct output iov_len */
  667 + iov1[1].iov_len = copy.out_len -
  668 + sizeof(struct virtio_net_hdr);
  669 + verify_out_len(mic, &copy);
  670 +#ifdef DEBUG
  671 + disp_iovec(mic, copy, __func__,
  672 + __LINE__);
  673 + mpsslog("%s %s %d ",
  674 + mic->name, __func__, __LINE__);
  675 + mpsslog("read from net 0x%lx\n",
  676 + sum_iovec_len(copy));
  677 +#endif
  678 + len = writev(net_poll[NET_FD_TUN].fd,
  679 + copy.iov, copy.iovcnt);
  680 + if (len != sum_iovec_len(&copy)) {
  681 + mpsslog("Tun write failed %s ",
  682 + strerror(errno));
  683 + mpsslog("len 0x%x ", len);
  684 + mpsslog("read_len 0x%x\n",
  685 + sum_iovec_len(&copy));
  686 + } else {
  687 +#ifdef DEBUG
  688 + disp_iovec(mic, &copy, __func__,
  689 + __LINE__);
  690 + mpsslog("%s %s %d ",
  691 + mic->name, __func__,
  692 + __LINE__);
  693 + mpsslog("wrote to tap 0x%lx\n",
  694 + len);
  695 +#endif
  696 + }
  697 + } else {
  698 + mpsslog("%s %s %d mic_virtio_copy %s\n",
  699 + mic->name, __func__, __LINE__,
  700 + strerror(errno));
  701 + break;
  702 + }
  703 + }
  704 + }
  705 + if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
  706 + mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
  707 + }
  708 +done:
  709 + pthread_exit(NULL);
  710 +}
  711 +
  712 +/* virtio_console */
  713 +#define VIRTIO_CONSOLE_FD 0
  714 +#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
  715 +#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
  716 +#define MAX_BUFFER_SIZE PAGE_SIZE
  717 +
  718 +static void *
  719 +virtio_console(void *arg)
  720 +{
  721 + static __u8 vcons_buf[2][PAGE_SIZE];
  722 + struct iovec vcons_iov[2] = {
  723 + { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
  724 + { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
  725 + };
  726 + struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
  727 + struct mic_info *mic = (struct mic_info *)arg;
  728 + int err;
  729 + struct pollfd console_poll[MAX_CONSOLE_FD];
  730 + int pty_fd;
  731 + char *pts_name;
  732 + ssize_t len;
  733 + struct mic_vring tx_vr, rx_vr;
  734 + struct mic_copy_desc copy;
  735 + struct mic_device_desc *desc;
  736 +
  737 + pty_fd = posix_openpt(O_RDWR);
  738 + if (pty_fd < 0) {
  739 + mpsslog("can't open a pseudoterminal master device: %s\n",
  740 + strerror(errno));
  741 + goto _return;
  742 + }
  743 + pts_name = ptsname(pty_fd);
  744 + if (pts_name == NULL) {
  745 + mpsslog("can't get pts name\n");
  746 + goto _close_pty;
  747 + }
  748 + printf("%s console message goes to %s\n", mic->name, pts_name);
  749 + mpsslog("%s console message goes to %s\n", mic->name, pts_name);
  750 + err = grantpt(pty_fd);
  751 + if (err < 0) {
  752 + mpsslog("can't grant access: %s %s\n",
  753 + pts_name, strerror(errno));
  754 + goto _close_pty;
  755 + }
  756 + err = unlockpt(pty_fd);
  757 + if (err < 0) {
  758 + mpsslog("can't unlock a pseudoterminal: %s %s\n",
  759 + pts_name, strerror(errno));
  760 + goto _close_pty;
  761 + }
  762 + console_poll[MONITOR_FD].fd = pty_fd;
  763 + console_poll[MONITOR_FD].events = POLLIN;
  764 +
  765 + console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
  766 + console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
  767 +
  768 + if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
  769 + VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
  770 + virtcons_dev_page.dd.num_vq)) {
  771 + mpsslog("%s init_vr failed %s\n",
  772 + mic->name, strerror(errno));
  773 + goto _close_pty;
  774 + }
  775 +
  776 + copy.iovcnt = 1;
  777 + desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
  778 +
  779 + for (;;) {
  780 + console_poll[MONITOR_FD].revents = 0;
  781 + console_poll[VIRTIO_CONSOLE_FD].revents = 0;
  782 + err = poll(console_poll, MAX_CONSOLE_FD, -1);
  783 + if (err < 0) {
  784 + mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
  785 + strerror(errno));
  786 + continue;
  787 + }
  788 + if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
  789 + wait_for_card_driver(mic,
  790 + mic->mic_console.virtio_console_fd,
  791 + VIRTIO_ID_CONSOLE);
  792 +
  793 + if (console_poll[MONITOR_FD].revents & POLLIN) {
  794 + copy.iov = iov0;
  795 + len = readv(pty_fd, copy.iov, copy.iovcnt);
  796 + if (len > 0) {
  797 +#ifdef DEBUG
  798 + disp_iovec(mic, copy, __func__, __LINE__);
  799 + mpsslog("%s %s %d read from tap 0x%lx\n",
  800 + mic->name, __func__, __LINE__,
  801 + len);
  802 +#endif
  803 + spin_for_descriptors(mic, &tx_vr);
  804 + txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
  805 + &copy, len);
  806 +
  807 + err = mic_virtio_copy(mic,
  808 + mic->mic_console.virtio_console_fd,
  809 + &tx_vr, &copy);
  810 + if (err < 0) {
  811 + mpsslog("%s %s %d mic_virtio_copy %s\n",
  812 + mic->name, __func__, __LINE__,
  813 + strerror(errno));
  814 + }
  815 + if (!err)
  816 + verify_out_len(mic, &copy);
  817 +#ifdef DEBUG
  818 + disp_iovec(mic, copy, __func__, __LINE__);
  819 + mpsslog("%s %s %d wrote to net 0x%lx\n",
  820 + mic->name, __func__, __LINE__,
  821 + sum_iovec_len(copy));
  822 +#endif
  823 + /* Reinitialize IOV for next run */
  824 + iov0->iov_len = PAGE_SIZE;
  825 + } else if (len < 0) {
  826 + disp_iovec(mic, &copy, __func__, __LINE__);
  827 + mpsslog("%s %s %d read failed %s ",
  828 + mic->name, __func__, __LINE__,
  829 + strerror(errno));
  830 + mpsslog("cnt %d sum %d\n",
  831 + copy.iovcnt, sum_iovec_len(&copy));
  832 + }
  833 + }
  834 +
  835 + if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
  836 + while (rx_vr.info->avail_idx !=
  837 + le16toh(rx_vr.vr.avail->idx)) {
  838 + copy.iov = iov1;
  839 + txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
  840 + &copy, PAGE_SIZE);
  841 +
  842 + err = mic_virtio_copy(mic,
  843 + mic->mic_console.virtio_console_fd,
  844 + &rx_vr, &copy);
  845 + if (!err) {
  846 + /* Set the correct output iov_len */
  847 + iov1->iov_len = copy.out_len;
  848 + verify_out_len(mic, &copy);
  849 +#ifdef DEBUG
  850 + disp_iovec(mic, copy, __func__,
  851 + __LINE__);
  852 + mpsslog("%s %s %d ",
  853 + mic->name, __func__, __LINE__);
  854 + mpsslog("read from net 0x%lx\n",
  855 + sum_iovec_len(copy));
  856 +#endif
  857 + len = writev(pty_fd,
  858 + copy.iov, copy.iovcnt);
  859 + if (len != sum_iovec_len(&copy)) {
  860 + mpsslog("Tun write failed %s ",
  861 + strerror(errno));
  862 + mpsslog("len 0x%x ", len);
  863 + mpsslog("read_len 0x%x\n",
  864 + sum_iovec_len(&copy));
  865 + } else {
  866 +#ifdef DEBUG
  867 + disp_iovec(mic, copy, __func__,
  868 + __LINE__);
  869 + mpsslog("%s %s %d ",
  870 + mic->name, __func__,
  871 + __LINE__);
  872 + mpsslog("wrote to tap 0x%lx\n",
  873 + len);
  874 +#endif
  875 + }
  876 + } else {
  877 + mpsslog("%s %s %d mic_virtio_copy %s\n",
  878 + mic->name, __func__, __LINE__,
  879 + strerror(errno));
  880 + break;
  881 + }
  882 + }
  883 + }
  884 + if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
  885 + mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
  886 + }
  887 +_close_pty:
  888 + close(pty_fd);
  889 +_return:
  890 + pthread_exit(NULL);
  891 +}
  892 +
  893 +static void
  894 +add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
  895 +{
  896 + char path[PATH_MAX];
  897 + int fd, err;
  898 +
  899 + snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
  900 + fd = open(path, O_RDWR);
  901 + if (fd < 0) {
  902 + mpsslog("Could not open %s %s\n", path, strerror(errno));
  903 + return;
  904 + }
  905 +
  906 + err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
  907 + if (err < 0) {
  908 + mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
  909 + close(fd);
  910 + return;
  911 + }
  912 + switch (dd->type) {
  913 + case VIRTIO_ID_NET:
  914 + mic->mic_net.virtio_net_fd = fd;
  915 + mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
  916 + break;
  917 + case VIRTIO_ID_CONSOLE:
  918 + mic->mic_console.virtio_console_fd = fd;
  919 + mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
  920 + break;
  921 + case VIRTIO_ID_BLOCK:
  922 + mic->mic_virtblk.virtio_block_fd = fd;
  923 + mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
  924 + break;
  925 + }
  926 +}
  927 +
  928 +static bool
  929 +set_backend_file(struct mic_info *mic)
  930 +{
  931 + FILE *config;
  932 + char buff[PATH_MAX], *line, *evv, *p;
  933 +
  934 + snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
  935 + config = fopen(buff, "r");
  936 + if (config == NULL)
  937 + return false;
  938 + do { /* look for "virtblk_backend=XXXX" */
  939 + line = fgets(buff, PATH_MAX, config);
  940 + if (line == NULL)
  941 + break;
  942 + if (*line == '#')
  943 + continue;
  944 + p = strchr(line, '\n');
  945 + if (p)
  946 + *p = '\0';
  947 + } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
  948 + fclose(config);
  949 + if (line == NULL)
  950 + return false;
  951 + evv = strchr(line, '=');
  952 + if (evv == NULL)
  953 + return false;
  954 + mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
  955 + if (mic->mic_virtblk.backend_file == NULL) {
  956 + mpsslog("can't allocate memory\n", mic->name, mic->id);
  957 + return false;
  958 + }
  959 + strcpy(mic->mic_virtblk.backend_file, evv + 1);
  960 + return true;
  961 +}
  962 +
  963 +#define SECTOR_SIZE 512
  964 +static bool
  965 +set_backend_size(struct mic_info *mic)
  966 +{
  967 + mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
  968 + SEEK_END);
  969 + if (mic->mic_virtblk.backend_size < 0) {
  970 + mpsslog("%s: can't seek: %s\n",
  971 + mic->name, mic->mic_virtblk.backend_file);
  972 + return false;
  973 + }
  974 + virtblk_dev_page.blk_config.capacity =
  975 + mic->mic_virtblk.backend_size / SECTOR_SIZE;
  976 + if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
  977 + virtblk_dev_page.blk_config.capacity++;
  978 +
  979 + virtblk_dev_page.blk_config.capacity =
  980 + htole64(virtblk_dev_page.blk_config.capacity);
  981 +
  982 + return true;
  983 +}
  984 +
  985 +static bool
  986 +open_backend(struct mic_info *mic)
  987 +{
  988 + if (!set_backend_file(mic))
  989 + goto _error_exit;
  990 + mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
  991 + if (mic->mic_virtblk.backend < 0) {
  992 + mpsslog("%s: can't open: %s\n", mic->name,
  993 + mic->mic_virtblk.backend_file);
  994 + goto _error_free;
  995 + }
  996 + if (!set_backend_size(mic))
  997 + goto _error_close;
  998 + mic->mic_virtblk.backend_addr = mmap(NULL,
  999 + mic->mic_virtblk.backend_size,
  1000 + PROT_READ|PROT_WRITE, MAP_SHARED,
  1001 + mic->mic_virtblk.backend, 0L);
  1002 + if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
  1003 + mpsslog("%s: can't map: %s %s\n",
  1004 + mic->name, mic->mic_virtblk.backend_file,
  1005 + strerror(errno));
  1006 + goto _error_close;
  1007 + }
  1008 + return true;
  1009 +
  1010 + _error_close:
  1011 + close(mic->mic_virtblk.backend);
  1012 + _error_free:
  1013 + free(mic->mic_virtblk.backend_file);
  1014 + _error_exit:
  1015 + return false;
  1016 +}
  1017 +
  1018 +static void
  1019 +close_backend(struct mic_info *mic)
  1020 +{
  1021 + munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
  1022 + close(mic->mic_virtblk.backend);
  1023 + free(mic->mic_virtblk.backend_file);
  1024 +}
  1025 +
  1026 +static bool
  1027 +start_virtblk(struct mic_info *mic, struct mic_vring *vring)
  1028 +{
  1029 + if (((__u64)&virtblk_dev_page.blk_config % 8) != 0) {
  1030 + mpsslog("%s: blk_config is not 8 byte aligned.\n",
  1031 + mic->name);
  1032 + return false;
  1033 + }
  1034 + add_virtio_device(mic, &virtblk_dev_page.dd);
  1035 + if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
  1036 + VIRTIO_ID_BLOCK, vring, NULL, virtblk_dev_page.dd.num_vq)) {
  1037 + mpsslog("%s init_vr failed %s\n",
  1038 + mic->name, strerror(errno));
  1039 + return false;
  1040 + }
  1041 + return true;
  1042 +}
  1043 +
  1044 +static void
  1045 +stop_virtblk(struct mic_info *mic)
  1046 +{
  1047 + int vr_size, ret;
  1048 +
  1049 + vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
  1050 + MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
  1051 + ret = munmap(mic->mic_virtblk.block_dp,
  1052 + MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
  1053 + if (ret < 0)
  1054 + mpsslog("%s munmap errno %d\n", mic->name, errno);
  1055 + close(mic->mic_virtblk.virtio_block_fd);
  1056 +}
  1057 +
  1058 +static __u8
  1059 +header_error_check(struct vring_desc *desc)
  1060 +{
  1061 + if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
  1062 + mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
  1063 + __func__, __LINE__);
  1064 + return -EIO;
  1065 + }
  1066 + if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
  1067 + mpsslog("%s() %d: alone\n",
  1068 + __func__, __LINE__);
  1069 + return -EIO;
  1070 + }
  1071 + if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
  1072 + mpsslog("%s() %d: not read\n",
  1073 + __func__, __LINE__);
  1074 + return -EIO;
  1075 + }
  1076 + return 0;
  1077 +}
  1078 +
  1079 +static int
  1080 +read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
  1081 +{
  1082 + struct iovec iovec;
  1083 + struct mic_copy_desc copy;
  1084 +
  1085 + iovec.iov_len = sizeof(*hdr);
  1086 + iovec.iov_base = hdr;
  1087 + copy.iov = &iovec;
  1088 + copy.iovcnt = 1;
  1089 + copy.vr_idx = 0; /* only one vring on virtio_block */
  1090 + copy.update_used = false; /* do not update used index */
  1091 + return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  1092 +}
  1093 +
  1094 +static int
  1095 +transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
  1096 +{
  1097 + struct mic_copy_desc copy;
  1098 +
  1099 + copy.iov = iovec;
  1100 + copy.iovcnt = iovcnt;
  1101 + copy.vr_idx = 0; /* only one vring on virtio_block */
  1102 + copy.update_used = false; /* do not update used index */
  1103 + return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  1104 +}
  1105 +
  1106 +static __u8
  1107 +status_error_check(struct vring_desc *desc)
  1108 +{
  1109 + if (le32toh(desc->len) != sizeof(__u8)) {
  1110 + mpsslog("%s() %d: length is not sizeof(status)\n",
  1111 + __func__, __LINE__);
  1112 + return -EIO;
  1113 + }
  1114 + return 0;
  1115 +}
  1116 +
  1117 +static int
  1118 +write_status(int fd, __u8 *status)
  1119 +{
  1120 + struct iovec iovec;
  1121 + struct mic_copy_desc copy;
  1122 +
  1123 + iovec.iov_base = status;
  1124 + iovec.iov_len = sizeof(*status);
  1125 + copy.iov = &iovec;
  1126 + copy.iovcnt = 1;
  1127 + copy.vr_idx = 0; /* only one vring on virtio_block */
  1128 + copy.update_used = true; /* Update used index */
  1129 + return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  1130 +}
  1131 +
  1132 +static void *
  1133 +virtio_block(void *arg)
  1134 +{
  1135 + struct mic_info *mic = (struct mic_info *) arg;
  1136 + int ret;
  1137 + struct pollfd block_poll;
  1138 + struct mic_vring vring;
  1139 + __u16 avail_idx;
  1140 + __u32 desc_idx;
  1141 + struct vring_desc *desc;
  1142 + struct iovec *iovec, *piov;
  1143 + __u8 status;
  1144 + __u32 buffer_desc_idx;
  1145 + struct virtio_blk_outhdr hdr;
  1146 + void *fos;
  1147 +
  1148 + for (;;) { /* forever */
  1149 + if (!open_backend(mic)) { /* No virtblk */
  1150 + for (mic->mic_virtblk.signaled = 0;
  1151 + !mic->mic_virtblk.signaled;)
  1152 + sleep(1);
  1153 + continue;
  1154 + }
  1155 +
  1156 + /* backend file is specified. */
  1157 + if (!start_virtblk(mic, &vring))
  1158 + goto _close_backend;
  1159 + iovec = malloc(sizeof(*iovec) *
  1160 + le32toh(virtblk_dev_page.blk_config.seg_max));
  1161 + if (!iovec) {
  1162 + mpsslog("%s: can't alloc iovec: %s\n",
  1163 + mic->name, strerror(ENOMEM));
  1164 + goto _stop_virtblk;
  1165 + }
  1166 +
  1167 + block_poll.fd = mic->mic_virtblk.virtio_block_fd;
  1168 + block_poll.events = POLLIN;
  1169 + for (mic->mic_virtblk.signaled = 0;
  1170 + !mic->mic_virtblk.signaled;) {
  1171 + block_poll.revents = 0;
  1172 + /* timeout in 1 sec to see signaled */
  1173 + ret = poll(&block_poll, 1, 1000);
  1174 + if (ret < 0) {
  1175 + mpsslog("%s %d: poll failed: %s\n",
  1176 + __func__, __LINE__,
  1177 + strerror(errno));
  1178 + continue;
  1179 + }
  1180 +
  1181 + if (!(block_poll.revents & POLLIN)) {
  1182 +#ifdef DEBUG
  1183 + mpsslog("%s %d: block_poll.revents=0x%x\n",
  1184 + __func__, __LINE__, block_poll.revents);
  1185 +#endif
  1186 + continue;
  1187 + }
  1188 +
  1189 + /* POLLIN */
  1190 + while (vring.info->avail_idx !=
  1191 + le16toh(vring.vr.avail->idx)) {
  1192 + /* read header element */
  1193 + avail_idx =
  1194 + vring.info->avail_idx &
  1195 + (vring.vr.num - 1);
  1196 + desc_idx = le16toh(
  1197 + vring.vr.avail->ring[avail_idx]);
  1198 + desc = &vring.vr.desc[desc_idx];
  1199 +#ifdef DEBUG
  1200 + mpsslog("%s() %d: avail_idx=%d ",
  1201 + __func__, __LINE__,
  1202 + vring.info->avail_idx);
  1203 + mpsslog("vring.vr.num=%d desc=%p\n",
  1204 + vring.vr.num, desc);
  1205 +#endif
  1206 + status = header_error_check(desc);
  1207 + ret = read_header(
  1208 + mic->mic_virtblk.virtio_block_fd,
  1209 + &hdr, desc_idx);
  1210 + if (ret < 0) {
  1211 + mpsslog("%s() %d %s: ret=%d %s\n",
  1212 + __func__, __LINE__,
  1213 + mic->name, ret,
  1214 + strerror(errno));
  1215 + break;
  1216 + }
  1217 + /* buffer element */
  1218 + piov = iovec;
  1219 + status = 0;
  1220 + fos = mic->mic_virtblk.backend_addr +
  1221 + (hdr.sector * SECTOR_SIZE);
  1222 + buffer_desc_idx = desc_idx =
  1223 + next_desc(desc);
  1224 + for (desc = &vring.vr.desc[buffer_desc_idx];
  1225 + desc->flags & VRING_DESC_F_NEXT;
  1226 + desc_idx = next_desc(desc),
  1227 + desc = &vring.vr.desc[desc_idx]) {
  1228 + piov->iov_len = desc->len;
  1229 + piov->iov_base = fos;
  1230 + piov++;
  1231 + fos += desc->len;
  1232 + }
  1233 + /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
  1234 + if (hdr.type & ~(VIRTIO_BLK_T_OUT |
  1235 + VIRTIO_BLK_T_GET_ID)) {
  1236 + /*
  1237 + VIRTIO_BLK_T_IN - does not do
  1238 + anything. Probably for documenting.
  1239 + VIRTIO_BLK_T_SCSI_CMD - for
  1240 + virtio_scsi.
  1241 + VIRTIO_BLK_T_FLUSH - turned off in
  1242 + config space.
  1243 + VIRTIO_BLK_T_BARRIER - defined but not
  1244 + used in anywhere.
  1245 + */
  1246 + mpsslog("%s() %d: type %x ",
  1247 + __func__, __LINE__,
  1248 + hdr.type);
  1249 + mpsslog("is not supported\n");
  1250 + status = -ENOTSUP;
  1251 +
  1252 + } else {
  1253 + ret = transfer_blocks(
  1254 + mic->mic_virtblk.virtio_block_fd,
  1255 + iovec,
  1256 + piov - iovec);
  1257 + if (ret < 0 &&
  1258 + status != 0)
  1259 + status = ret;
  1260 + }
  1261 + /* write status and update used pointer */
  1262 + if (status != 0)
  1263 + status = status_error_check(desc);
  1264 + ret = write_status(
  1265 + mic->mic_virtblk.virtio_block_fd,
  1266 + &status);
  1267 +#ifdef DEBUG
  1268 + mpsslog("%s() %d: write status=%d on desc=%p\n",
  1269 + __func__, __LINE__,
  1270 + status, desc);
  1271 +#endif
  1272 + }
  1273 + }
  1274 + free(iovec);
  1275 +_stop_virtblk:
  1276 + stop_virtblk(mic);
  1277 +_close_backend:
  1278 + close_backend(mic);
  1279 + } /* forever */
  1280 +
  1281 + pthread_exit(NULL);
  1282 +}
  1283 +
  1284 +static void
  1285 +reset(struct mic_info *mic)
  1286 +{
  1287 +#define RESET_TIMEOUT 120
  1288 + int i = RESET_TIMEOUT;
  1289 + setsysfs(mic->name, "state", "reset");
  1290 + while (i) {
  1291 + char *state;
  1292 + state = readsysfs(mic->name, "state");
  1293 + if (!state)
  1294 + goto retry;
  1295 + mpsslog("%s: %s %d state %s\n",
  1296 + mic->name, __func__, __LINE__, state);
  1297 + if ((!strcmp(state, "offline"))) {
  1298 + free(state);
  1299 + break;
  1300 + }
  1301 + free(state);
  1302 +retry:
  1303 + sleep(1);
  1304 + i--;
  1305 + }
  1306 +}
  1307 +
  1308 +static int
  1309 +get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
  1310 +{
  1311 + if (!strcmp(shutdown_status, "nop"))
  1312 + return MIC_NOP;
  1313 + if (!strcmp(shutdown_status, "crashed"))
  1314 + return MIC_CRASHED;
  1315 + if (!strcmp(shutdown_status, "halted"))
  1316 + return MIC_HALTED;
  1317 + if (!strcmp(shutdown_status, "poweroff"))
  1318 + return MIC_POWER_OFF;
  1319 + if (!strcmp(shutdown_status, "restart"))
  1320 + return MIC_RESTART;
  1321 + mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
  1322 + /* Invalid state */
  1323 + assert(0);
  1324 +};
  1325 +
  1326 +static int get_mic_state(struct mic_info *mic, char *state)
  1327 +{
  1328 + if (!strcmp(state, "offline"))
  1329 + return MIC_OFFLINE;
  1330 + if (!strcmp(state, "online"))
  1331 + return MIC_ONLINE;
  1332 + if (!strcmp(state, "shutting_down"))
  1333 + return MIC_SHUTTING_DOWN;
  1334 + if (!strcmp(state, "reset_failed"))
  1335 + return MIC_RESET_FAILED;
  1336 + mpsslog("%s: BUG invalid state %s\n", mic->name, state);
  1337 + /* Invalid state */
  1338 + assert(0);
  1339 +};
  1340 +
  1341 +static void mic_handle_shutdown(struct mic_info *mic)
  1342 +{
  1343 +#define SHUTDOWN_TIMEOUT 60
  1344 + int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
  1345 + char *shutdown_status;
  1346 + while (i) {
  1347 + shutdown_status = readsysfs(mic->name, "shutdown_status");
  1348 + if (!shutdown_status)
  1349 + continue;
  1350 + mpsslog("%s: %s %d shutdown_status %s\n",
  1351 + mic->name, __func__, __LINE__, shutdown_status);
  1352 + switch (get_mic_shutdown_status(mic, shutdown_status)) {
  1353 + case MIC_RESTART:
  1354 + mic->restart = 1;
  1355 + case MIC_HALTED:
  1356 + case MIC_POWER_OFF:
  1357 + case MIC_CRASHED:
  1358 + free(shutdown_status);
  1359 + goto reset;
  1360 + default:
  1361 + break;
  1362 + }
  1363 + free(shutdown_status);
  1364 + sleep(1);
  1365 + i--;
  1366 + }
  1367 +reset:
  1368 + ret = kill(mic->pid, SIGTERM);
  1369 + mpsslog("%s: %s %d kill pid %d ret %d\n",
  1370 + mic->name, __func__, __LINE__,
  1371 + mic->pid, ret);
  1372 + if (!ret) {
  1373 + ret = waitpid(mic->pid, &stat,
  1374 + WIFSIGNALED(stat));
  1375 + mpsslog("%s: %s %d waitpid ret %d pid %d\n",
  1376 + mic->name, __func__, __LINE__,
  1377 + ret, mic->pid);
  1378 + }
  1379 + if (ret == mic->pid)
  1380 + reset(mic);
  1381 +}
  1382 +
  1383 +static void *
  1384 +mic_config(void *arg)
  1385 +{
  1386 + struct mic_info *mic = (struct mic_info *)arg;
  1387 + char *state = NULL;
  1388 + char pathname[PATH_MAX];
  1389 + int fd, ret;
  1390 + struct pollfd ufds[1];
  1391 + char value[4096];
  1392 +
  1393 + snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
  1394 + MICSYSFSDIR, mic->name, "state");
  1395 +
  1396 + fd = open(pathname, O_RDONLY);
  1397 + if (fd < 0) {
  1398 + mpsslog("%s: opening file %s failed %s\n",
  1399 + mic->name, pathname, strerror(errno));
  1400 + goto error;
  1401 + }
  1402 +
  1403 + do {
  1404 + ret = read(fd, value, sizeof(value));
  1405 + if (ret < 0) {
  1406 + mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
  1407 + mic->name, pathname, strerror(errno));
  1408 + goto close_error1;
  1409 + }
  1410 +retry:
  1411 + state = readsysfs(mic->name, "state");
  1412 + if (!state)
  1413 + goto retry;
  1414 + mpsslog("%s: %s %d state %s\n",
  1415 + mic->name, __func__, __LINE__, state);
  1416 + switch (get_mic_state(mic, state)) {
  1417 + case MIC_SHUTTING_DOWN:
  1418 + mic_handle_shutdown(mic);
  1419 + goto close_error;
  1420 + default:
  1421 + break;
  1422 + }
  1423 + free(state);
  1424 +
  1425 + ufds[0].fd = fd;
  1426 + ufds[0].events = POLLERR | POLLPRI;
  1427 + ret = poll(ufds, 1, -1);
  1428 + if (ret < 0) {
  1429 + mpsslog("%s: poll failed %s\n",
  1430 + mic->name, strerror(errno));
  1431 + goto close_error1;
  1432 + }
  1433 + } while (1);
  1434 +close_error:
  1435 + free(state);
  1436 +close_error1:
  1437 + close(fd);
  1438 +error:
  1439 + init_mic(mic);
  1440 + pthread_exit(NULL);
  1441 +}
  1442 +
  1443 +static void
  1444 +set_cmdline(struct mic_info *mic)
  1445 +{
  1446 + char buffer[PATH_MAX];
  1447 + int len;
  1448 +
  1449 + len = snprintf(buffer, PATH_MAX,
  1450 + "clocksource=tsc highres=off nohz=off ");
  1451 + len += snprintf(buffer + len, PATH_MAX,
  1452 + "cpufreq_on;corec6_off;pc3_off;pc6_off ");
  1453 + len += snprintf(buffer + len, PATH_MAX,
  1454 + "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
  1455 + mic->id);
  1456 +
  1457 + setsysfs(mic->name, "cmdline", buffer);
  1458 + mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
  1459 + snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
  1460 + mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
  1461 +}
  1462 +
  1463 +static void
  1464 +set_log_buf_info(struct mic_info *mic)
  1465 +{
  1466 + int fd;
  1467 + off_t len;
  1468 + char system_map[] = "/lib/firmware/mic/System.map";
  1469 + char *map, *temp, log_buf[17] = {'\0'};
  1470 +
  1471 + fd = open(system_map, O_RDONLY);
  1472 + if (fd < 0) {
  1473 + mpsslog("%s: Opening System.map failed: %d\n",
  1474 + mic->name, errno);
  1475 + return;
  1476 + }
  1477 + len = lseek(fd, 0, SEEK_END);
  1478 + if (len < 0) {
  1479 + mpsslog("%s: Reading System.map size failed: %d\n",
  1480 + mic->name, errno);
  1481 + close(fd);
  1482 + return;
  1483 + }
  1484 + map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
  1485 + if (map == MAP_FAILED) {
  1486 + mpsslog("%s: mmap of System.map failed: %d\n",
  1487 + mic->name, errno);
  1488 + close(fd);
  1489 + return;
  1490 + }
  1491 + temp = strstr(map, "__log_buf");
  1492 + if (!temp) {
  1493 + mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
  1494 + munmap(map, len);
  1495 + close(fd);
  1496 + return;
  1497 + }
  1498 + strncpy(log_buf, temp - 19, 16);
  1499 + setsysfs(mic->name, "log_buf_addr", log_buf);
  1500 + mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
  1501 + temp = strstr(map, "log_buf_len");
  1502 + if (!temp) {
  1503 + mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
  1504 + munmap(map, len);
  1505 + close(fd);
  1506 + return;
  1507 + }
  1508 + strncpy(log_buf, temp - 19, 16);
  1509 + setsysfs(mic->name, "log_buf_len", log_buf);
  1510 + mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
  1511 + munmap(map, len);
  1512 + close(fd);
  1513 +}
  1514 +
  1515 +static void init_mic(struct mic_info *mic);
  1516 +
  1517 +static void
  1518 +change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
  1519 +{
  1520 + struct mic_info *mic;
  1521 +
  1522 + for (mic = mic_list.next; mic != NULL; mic = mic->next)
  1523 + mic->mic_virtblk.signaled = 1/* true */;
  1524 +}
  1525 +
  1526 +static void
  1527 +init_mic(struct mic_info *mic)
  1528 +{
  1529 + struct sigaction ignore = {
  1530 + .sa_flags = 0,
  1531 + .sa_handler = SIG_IGN
  1532 + };
  1533 + struct sigaction act = {
  1534 + .sa_flags = SA_SIGINFO,
  1535 + .sa_sigaction = change_virtblk_backend,
  1536 + };
  1537 + char buffer[PATH_MAX];
  1538 + int err;
  1539 +
  1540 + /*
  1541 + * Currently, one virtio block device is supported for each MIC card
  1542 + * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
  1543 + * The signal informs the virtio block backend about a change in the
  1544 + * configuration file which specifies the virtio backend file name on
  1545 + * the host. Virtio block backend then re-reads the configuration file
  1546 + * and switches to the new block device. This signalling mechanism may
  1547 + * not be required once multiple virtio block devices are supported by
  1548 + * the MIC daemon.
  1549 + */
  1550 + sigaction(SIGUSR1, &ignore, NULL);
  1551 +
  1552 + mic->pid = fork();
  1553 + switch (mic->pid) {
  1554 + case 0:
  1555 + set_log_buf_info(mic);
  1556 + set_cmdline(mic);
  1557 + add_virtio_device(mic, &virtcons_dev_page.dd);
  1558 + add_virtio_device(mic, &virtnet_dev_page.dd);
  1559 + err = pthread_create(&mic->mic_console.console_thread, NULL,
  1560 + virtio_console, mic);
  1561 + if (err)
  1562 + mpsslog("%s virtcons pthread_create failed %s\n",
  1563 + mic->name, strerror(err));
  1564 + err = pthread_create(&mic->mic_net.net_thread, NULL,
  1565 + virtio_net, mic);
  1566 + if (err)
  1567 + mpsslog("%s virtnet pthread_create failed %s\n",
  1568 + mic->name, strerror(err));
  1569 + err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
  1570 + virtio_block, mic);
  1571 + if (err)
  1572 + mpsslog("%s virtblk pthread_create failed %s\n",
  1573 + mic->name, strerror(err));
  1574 + sigemptyset(&act.sa_mask);
  1575 + err = sigaction(SIGUSR1, &act, NULL);
  1576 + if (err)
  1577 + mpsslog("%s sigaction SIGUSR1 failed %s\n",
  1578 + mic->name, strerror(errno));
  1579 + while (1)
  1580 + sleep(60);
  1581 + case -1:
  1582 + mpsslog("fork failed MIC name %s id %d errno %d\n",
  1583 + mic->name, mic->id, errno);
  1584 + break;
  1585 + default:
  1586 + if (mic->restart) {
  1587 + snprintf(buffer, PATH_MAX, "boot");
  1588 + setsysfs(mic->name, "state", buffer);
  1589 + mpsslog("%s restarting mic %d\n",
  1590 + mic->name, mic->restart);
  1591 + mic->restart = 0;
  1592 + }
  1593 + pthread_create(&mic->config_thread, NULL, mic_config, mic);
  1594 + }
  1595 +}
  1596 +
  1597 +static void
  1598 +start_daemon(void)
  1599 +{
  1600 + struct mic_info *mic;
  1601 +
  1602 + for (mic = mic_list.next; mic != NULL; mic = mic->next)
  1603 + init_mic(mic);
  1604 +
  1605 + while (1)
  1606 + sleep(60);
  1607 +}
  1608 +
  1609 +static int
  1610 +init_mic_list(void)
  1611 +{
  1612 + struct mic_info *mic = &mic_list;
  1613 + struct dirent *file;
  1614 + DIR *dp;
  1615 + int cnt = 0;
  1616 +
  1617 + dp = opendir(MICSYSFSDIR);
  1618 + if (!dp)
  1619 + return 0;
  1620 +
  1621 + while ((file = readdir(dp)) != NULL) {
  1622 + if (!strncmp(file->d_name, "mic", 3)) {
  1623 + mic->next = malloc(sizeof(struct mic_info));
  1624 + if (mic->next) {
  1625 + mic = mic->next;
  1626 + mic->next = NULL;
  1627 + memset(mic, 0, sizeof(struct mic_info));
  1628 + mic->id = atoi(&file->d_name[3]);
  1629 + mic->name = malloc(strlen(file->d_name) + 16);
  1630 + if (mic->name)
  1631 + strcpy(mic->name, file->d_name);
  1632 + mpsslog("MIC name %s id %d\n", mic->name,
  1633 + mic->id);
  1634 + cnt++;
  1635 + }
  1636 + }
  1637 + }
  1638 +
  1639 + closedir(dp);
  1640 + return cnt;
  1641 +}
  1642 +
  1643 +void
  1644 +mpsslog(char *format, ...)
  1645 +{
  1646 + va_list args;
  1647 + char buffer[4096];
  1648 + char ts[52], *ts1;
  1649 + time_t t;
  1650 +
  1651 + if (logfp == NULL)
  1652 + return;
  1653 +
  1654 + va_start(args, format);
  1655 + vsprintf(buffer, format, args);
  1656 + va_end(args);
  1657 +
  1658 + time(&t);
  1659 + ts1 = ctime_r(&t, ts);
  1660 + ts1[strlen(ts1) - 1] = '\0';
  1661 + fprintf(logfp, "%s: %s", ts1, buffer);
  1662 +
  1663 + fflush(logfp);
  1664 +}
  1665 +
  1666 +int
  1667 +main(int argc, char *argv[])
  1668 +{
  1669 + int cnt;
  1670 + pid_t pid;
  1671 +
  1672 + myname = argv[0];
  1673 +
  1674 + logfp = fopen(LOGFILE_NAME, "a+");
  1675 + if (!logfp) {
  1676 + fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
  1677 + exit(1);
  1678 + }
  1679 + pid = fork();
  1680 + switch (pid) {
  1681 + case 0:
  1682 + break;
  1683 + case -1:
  1684 + exit(2);
  1685 + default:
  1686 + exit(0);
  1687 + }
  1688 +
  1689 + mpsslog("MIC Daemon start\n");
  1690 +
  1691 + cnt = init_mic_list();
  1692 + if (cnt == 0) {
  1693 + mpsslog("MIC module not loaded\n");
  1694 + exit(3);
  1695 + }
  1696 + mpsslog("MIC found %d devices\n", cnt);
  1697 +
  1698 + start_daemon();
  1699 +
  1700 + exit(0);
  1701 +}
Documentation/mic/mpssd/mpssd.h
  1 +/*
  2 + * Intel MIC Platform Software Stack (MPSS)
  3 + *
  4 + * Copyright(c) 2013 Intel Corporation.
  5 + *
  6 + * This program is free software; you can redistribute it and/or modify
  7 + * it under the terms of the GNU General Public License, version 2, as
  8 + * published by the Free Software Foundation.
  9 + *
  10 + * This program is distributed in the hope that it will be useful, but
  11 + * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 + * General Public License for more details.
  14 + *
  15 + * The full GNU General Public License is included in this distribution in
  16 + * the file called "COPYING".
  17 + *
  18 + * Intel MIC User Space Tools.
  19 + */
  20 +#ifndef _MPSSD_H_
  21 +#define _MPSSD_H_
  22 +
  23 +#include <stdio.h>
  24 +#include <stdlib.h>
  25 +#include <string.h>
  26 +#include <fcntl.h>
  27 +#include <unistd.h>
  28 +#include <dirent.h>
  29 +#include <libgen.h>
  30 +#include <pthread.h>
  31 +#include <stdarg.h>
  32 +#include <time.h>
  33 +#include <errno.h>
  34 +#include <sys/dir.h>
  35 +#include <sys/ioctl.h>
  36 +#include <sys/poll.h>
  37 +#include <sys/types.h>
  38 +#include <sys/socket.h>
  39 +#include <sys/stat.h>
  40 +#include <sys/types.h>
  41 +#include <sys/mman.h>
  42 +#include <sys/utsname.h>
  43 +#include <sys/wait.h>
  44 +#include <netinet/in.h>
  45 +#include <arpa/inet.h>
  46 +#include <netdb.h>
  47 +#include <pthread.h>
  48 +#include <signal.h>
  49 +#include <limits.h>
  50 +#include <syslog.h>
  51 +#include <getopt.h>
  52 +#include <net/if.h>
  53 +#include <linux/if_tun.h>
  54 +#include <linux/if_tun.h>
  55 +#include <linux/virtio_ids.h>
  56 +
  57 +#define MICSYSFSDIR "/sys/class/mic"
  58 +#define LOGFILE_NAME "/var/log/mpssd"
  59 +#define PAGE_SIZE 4096
  60 +
  61 +struct mic_console_info {
  62 + pthread_t console_thread;
  63 + int virtio_console_fd;
  64 + void *console_dp;
  65 +};
  66 +
  67 +struct mic_net_info {
  68 + pthread_t net_thread;
  69 + int virtio_net_fd;
  70 + int tap_fd;
  71 + void *net_dp;
  72 +};
  73 +
  74 +struct mic_virtblk_info {
  75 + pthread_t block_thread;
  76 + int virtio_block_fd;
  77 + void *block_dp;
  78 + volatile sig_atomic_t signaled;
  79 + char *backend_file;
  80 + int backend;
  81 + void *backend_addr;
  82 + long backend_size;
  83 +};
  84 +
  85 +struct mic_info {
  86 + int id;
  87 + char *name;
  88 + pthread_t config_thread;
  89 + pid_t pid;
  90 + struct mic_console_info mic_console;
  91 + struct mic_net_info mic_net;
  92 + struct mic_virtblk_info mic_virtblk;
  93 + int restart;
  94 + struct mic_info *next;
  95 +};
  96 +
  97 +void mpsslog(char *format, ...);
  98 +char *readsysfs(char *dir, char *entry);
  99 +int setsysfs(char *dir, char *entry, char *value);
  100 +#endif
Documentation/mic/mpssd/sysfs.c
  1 +/*
  2 + * Intel MIC Platform Software Stack (MPSS)
  3 + *
  4 + * Copyright(c) 2013 Intel Corporation.
  5 + *
  6 + * This program is free software; you can redistribute it and/or modify
  7 + * it under the terms of the GNU General Public License, version 2, as
  8 + * published by the Free Software Foundation.
  9 + *
  10 + * This program is distributed in the hope that it will be useful, but
  11 + * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 + * General Public License for more details.
  14 + *
  15 + * The full GNU General Public License is included in this distribution in
  16 + * the file called "COPYING".
  17 + *
  18 + * Intel MIC User Space Tools.
  19 + */
  20 +
  21 +#include "mpssd.h"
  22 +
  23 +#define PAGE_SIZE 4096
  24 +
  25 +char *
  26 +readsysfs(char *dir, char *entry)
  27 +{
  28 + char filename[PATH_MAX];
  29 + char value[PAGE_SIZE];
  30 + char *string = NULL;
  31 + int fd;
  32 + int len;
  33 +
  34 + if (dir == NULL)
  35 + snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
  36 + else
  37 + snprintf(filename, PATH_MAX,
  38 + "%s/%s/%s", MICSYSFSDIR, dir, entry);
  39 +
  40 + fd = open(filename, O_RDONLY);
  41 + if (fd < 0) {
  42 + mpsslog("Failed to open sysfs entry '%s': %s\n",
  43 + filename, strerror(errno));
  44 + return NULL;
  45 + }
  46 +
  47 + len = read(fd, value, sizeof(value));
  48 + if (len < 0) {
  49 + mpsslog("Failed to read sysfs entry '%s': %s\n",
  50 + filename, strerror(errno));
  51 + goto readsys_ret;
  52 + }
  53 + if (len == 0)
  54 + goto readsys_ret;
  55 +
  56 + value[len - 1] = '\0';
  57 +
  58 + string = malloc(strlen(value) + 1);
  59 + if (string)
  60 + strcpy(string, value);
  61 +
  62 +readsys_ret:
  63 + close(fd);
  64 + return string;
  65 +}
  66 +
  67 +int
  68 +setsysfs(char *dir, char *entry, char *value)
  69 +{
  70 + char filename[PATH_MAX];
  71 + char *oldvalue;
  72 + int fd, ret = 0;
  73 +
  74 + if (dir == NULL)
  75 + snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
  76 + else
  77 + snprintf(filename, PATH_MAX, "%s/%s/%s",
  78 + MICSYSFSDIR, dir, entry);
  79 +
  80 + oldvalue = readsysfs(dir, entry);
  81 +
  82 + fd = open(filename, O_RDWR);
  83 + if (fd < 0) {
  84 + ret = errno;
  85 + mpsslog("Failed to open sysfs entry '%s': %s\n",
  86 + filename, strerror(errno));
  87 + goto done;
  88 + }
  89 +
  90 + if (!oldvalue || strcmp(value, oldvalue)) {
  91 + if (write(fd, value, strlen(value)) < 0) {
  92 + ret = errno;
  93 + mpsslog("Failed to write new sysfs entry '%s': %s\n",
  94 + filename, strerror(errno));
  95 + }
  96 + }
  97 + close(fd);
  98 +done:
  99 + if (oldvalue)
  100 + free(oldvalue);
  101 + return ret;
  102 +}