# Runs tracer with the input JSON and using the configuration required.

# Ensure we end all background tasks upon exit
trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM EXIT

# Prepares paths
export WARCPATH=$HOME/trace-archiver/warcs/warcstore
mkdir -p /tmp/workers-artifacts
mkdir -p /tmp/storm-logs
mkdir -p ${WARCPATH}
cp -r /usr/share/tracer/certs $HOME/trace-archiver/.
export CONFPATH=$HOME/trace-archiver/crawler-conf.yaml
cp /usr/share/tracer/crawler-conf.yaml "${CONFPATH}"

# Install the database
echo "Creating MySQL Database:"
echo "------------------------"
echo "unix user: `whoami`"
mariadb-install-db --user=`whoami` --basedir=/usr --datadir=/home/occam/db/mysql
echo "Done."
echo ""

# We need a running MySQL database created with the portals database/tables
export MYSQL_PORT=3306
echo "Running MySQL:"
echo "--------------"
mkdir -p /home/occam/db/mysql

while true; do
  while nc -z localhost ${MYSQL_PORT}; do
    export MYSQL_PORT=$((MYSQL_PORT+1))

    if [ $MYSQL_PORT -eq 3406 ]; then
      break
    fi
  done

  if [ $MYSQL_PORT -eq 3406 ]; then
    break
  fi

  mariadbd --port=${MYSQL_PORT} --datadir=/home/occam/db/mysql --skip-grant-tables > /dev/null 2> mariadb.error.log &
  export MYSQLPID=$!
  timeout 40 sh -c 'until nc -z $0 $1; do sleep 1; done' localhost ${MYSQL_PORT}

  if [ $? -eq 0 ]; then
    # Wait for the sock file
    timeout 10 sh -c "until ls /home/occam/db/mysql/mysql.sock > /dev/null 2> /dev/null; do sleep 1; done"

    if [ $? -eq 0 ]; then
      break
    fi
  fi
done

if [ $MYSQL_PORT -eq 3406 ]; then
  echo "Could not allocate a port for MySQL."
  exit 1
fi

# Set port in configuration
echo "port: ${MYSQL_PORT}"
sed "s;3306;${MYSQL_PORT};" -i ${CONFPATH}

echo "Done."
echo ""

echo "Creating Database Tables:"
echo "-------------------------"

# Create database for portals
cp /usr/share/tracer/mysql/tablesetup.sql .
# Run it
cat tablesetup.sql | mysql -u root

# Create a database user 'tracer'
echo "FLUSH PRIVILEGES; CREATE USER 'tracer'@'localhost' IDENTIFIED BY 'password'; GRANT ALL PRIVILEGES ON portals.* TO 'tracer'@'localhost'; FLUSH PRIVILEGES;" | mysql -u root

echo "Done."
echo ""

# OK. First we run Xvfb to create a framebuffer
echo "Running X Display:"
echo "------------------"

export DISPLAY=:0
Xvfb ${DISPLAY} > /dev/null 2> /dev/null &
export XVFBPID=$!

# Wait for it...
timeout 60 sh -c 'until xdpyinfo -display :0 2> /dev/null; do sleep 1; done'
echo "Done."
echo ""

echo "Running Selenium:"
echo "-----------------"

export SELENIUM_PORT=4444
SE_OPTS="-browserTimeout 1000 -timeout 1000"

while true; do
  while nc -z localhost ${SELENIUM_PORT}; do
    export SELENIUM_PORT=$((SELENIUM_PORT+1))

    if [ $SELENIUM_PORT -eq 4544 ]; then
      break
    fi
  done

  if [ $SELENIUM_PORT -eq 4544 ]; then
    break
  fi

  # Then we run Selenium to create a browser source
  java -jar /usr/share/selenium-server/selenium-server-standalone-3.141.59.jar -port ${SELENIUM_PORT} ${SE_OPTS} > /dev/null 2> /dev/null &
  export SELENIUMPID=$!

  # Wait for it...
  timeout 60 sh -c 'until nc -z $0 $1; do sleep 1; done' localhost ${SELENIUM_PORT}

  if [ $? -eq 0 ]; then
    break
  fi
done

if [ $SELENIUM_PORT -eq 4544 ]; then
  echo "Could not allocate a port for Selenium."
  exit 1
fi

# Set port in configuration
echo "port: ${SELENIUM_PORT}"
sed "s;4444;${SELENIUM_PORT};" -i ${CONFPATH}

echo "Done."
echo ""

echo "Reading files:"
echo "--------------"

# Use default trace
if ls ../inputs/0/0/*.json > /dev/null 2> /dev/null; then
  echo "Using tracer trace file found in ../inputs/0/0"
else
  echo "Using default tracer trace"
  mkdir -p ../inputs/0/0
  cp /usr/share/tracer/wilkie.how.json ../inputs/0/0/trace.json
  echo "{\"name\": \"wilkie.how.json\", \"type\": \"application\", \"subtype\": \"application/json\",, \"file\": \"trace.json\"}" > ../inputs/0/0/object.json
fi

# Use default configuration
if ls ../inputs/1/0/*.json > /dev/null 2> /dev/null; then
  echo "Using configuration found in ../inputs/1/0"
else
  echo "Using default tracer configuration"
  mkdir -p ../inputs/1/0
  echo "{\"url\": \"https://wilkie.how\"}" > ../inputs/1/0/config.json
fi
echo ""

# Then we run Storm with the right components from the configuration
export URL=`/usr/bin/cat ../inputs/1/0/config.json | python -c 'import json, sys; print(json.load(sys.stdin).get("url"))'`
export DEDUP=`/usr/bin/cat ../inputs/1/0/config.json | python -c 'import json, sys; print(json.load(sys.stdin).get("dedup"))'`
NAME=

# Set the flag when dedup is enabled
if [[ $DEDUP == *"True"* ]]; then
  sed "s;#warcprox.dedup.path;warcprox.dedup.path;" -i ${CONFPATH}
fi

# First the seed injector
mkdir -p ./seeds
rm -f ./seeds/seeds.txt
touch ./seeds/seeds.txt
SEEDPATH=
for file in ../inputs/0/0/*.json; do
  SEEDFILENAME=$(basename $file)
  if [ "$SEEDFILENAME" != "object.json" ]; then
    SEEDPATH=$(realpath $file)
    export NAME=`/usr/bin/cat $SEEDPATH | python -c 'import json, sys; print(json.load(sys.stdin).get("traceName"))'`
    printf "${URL}\ttrace=File://${SEEDPATH}\tfilter=dynamic.json\n" >> ./seeds/seeds.txt
  fi
done
echo "Seeds file:"
echo "-----------"
cat ./seeds/seeds.txt
echo ""

echo "Trace file:"
echo "-----------"
echo "path: ${SEEDPATH}"
echo "name: ${NAME}"
echo ""

echo "Configuration:"
echo "--------------"
echo "url: ${URL}"
echo ""

echo "Running Seed Injector:"
echo "----------------------"

storm jar /usr/share/tracer/target/stormcapture-0.2.jar gov.lanl.crawler.SeedInjector ./seeds seeds.txt -conf "${CONFPATH}" -local -c storm.workers.artifacts.dir=/tmp/workers-artifacts > /dev/null 2> /dev/null &
export SEEDINJECTORPID=$!

echo "Started."
echo ""

echo "Determining Proxy Port:"
echo "-----------------------"

export WARCPROX_PORT=8050
MYSQL_PORT_DIFF=$((MYSQL_PORT-3306))
export WARCPROX_PORT=$((WARCPROX_PORT+$MYSQL_PORT_DIFF))
while nc -z localhost ${WARCPROX_PORT}; do
  export WARCPROX_PORT=$((WARCPROX_PORT+1))

  if [ $WARCPROX_PORT -eq 8150 ]; then
    break
  fi
done

if [ $WARCPROX_PORT -eq 8150 ]; then
  break
fi

# Set port in configuration
echo "port: ${WARCPROX_PORT}"
sed "s;8050;${WARCPROX_PORT};" -i ${CONFPATH}

echo "Done."
echo ""

echo "Running Crawler:"
echo "----------------"

# And then the trace itself
storm jar /usr/share/tracer/target/stormcapture-0.2.jar gov.lanl.crawler.CrawlTopology -conf "${CONFPATH}" -local -c storm.workers.artifacts.dir=/tmp/workers-artifacts &
#sh -c "storm jar /usr/share/tracer/target/stormcapture-0.2.jar gov.lanl.crawler.CrawlTopology -conf \"${CONFPATH}\" -local -c storm.workers.artifacts.dir=/tmp/workers-artifacts | grep \"TRACE WarcWriterProcessor\" | grep \"url=\" | grep -Po \"url=b'\K[^']*\" | sed \"s;^;Tracing: ;\"" &
export CRAWLERPID=$!

# Wait for the WARC and then kill the subprocesses
timeout 3600 sh -c "until ls ${WARCPATH}/*.warc > /dev/null 2> /dev/null; do sleep 1; done"

echo "Done."
echo ""

echo "Stopping Seed Injector:"
echo "-----------------------"
kill -9 ${SEEDINJECTORPID}
echo "Done."
echo ""

echo "Stopping Crawler:"
echo "-----------------"
kill -9 ${CRAWLERPID}
echo "Done."
echo ""

echo "Stopping X Server:"
echo "------------------"
kill -9 ${XVFBPID}
echo "Done."
echo ""

echo "Stopping Selenium:"
echo "------------------"
kill -9 ${SELENIUMPID}
echo "Done."
echo ""

# Really get at 'em
pkill -9 java

echo "Stopping MySQL:"
echo "---------------"
kill -9 ${MYSQLPID}
echo "Done."
echo ""

echo "WARC Output:"
echo "------------"

i=0
for file in `ls ${WARCPATH}/*.warc`; do
  echo "file: ${file}"
  filename=$(basename ${file})
  mkdir -p ../outputs/0/${i}
  cp ${file} ../outputs/0/${i}/.
  echo "{\"name\": \"${URL} archive\", \"type\": \"website\", \"subtype\": \"application/warc\", \"file\": \"${filename}\", \"metadata\": {\"general\": {\"url\": \"${URL}\"}}}" > ../outputs/0/${i}/object.json
  i=$((i+1))
done

if [ $i -eq "0" ]; then
  echo "No output found."
  # Fail

  echo ""
  echo "Trace Failed."
  echo ""

  exit 1
fi

echo ""
echo "Trace Completed Successfully."
echo ""
