class_clean.snb.ipynb 7.73 KB
Newer Older
AREZKI HAFID's avatar
AREZKI HAFID committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
{
  "metadata" : {
    "id" : "2f852a9a-018b-4dd9-8c45-f87021dad78a",
    "name" : "class_clean.snb.ipynb",
    "user_save_timestamp" : "2018-05-22T15:01:51.417Z",
    "auto_save_timestamp" : "1970-01-01T01:00:00.000Z",
    "language_info" : {
      "name" : "scala",
      "file_extension" : "scala",
      "codemirror_mode" : "text/x-scala"
    },
    "trusted" : true,
    "sparkNotebook" : null,
    "customLocalRepo" : null,
    "customRepos" : null,
    "customDeps" : null,
    "customImports" : null,
    "customArgs" : null,
    "customSparkConf" : null,
    "customVars" : null
  },
  "cells" : [ {
    "metadata" : {
      "trusted" : true,
      "input_collapsed" : false,
      "collapsed" : false,
      "id" : "84A72D1FD7EB49888C7DF4242EB01AED"
    },
    "cell_type" : "code",
    "source" : [ "import org.apache.spark.SparkContext\n", "\n", "val spark = SparkSession \n", ".builder \n", ".appName (\"WorldBankIndex\") \n", ".getOrCreate ()\n" ],
    "outputs" : [ {
      "name" : "stdout",
      "output_type" : "stream",
      "text" : "import org.apache.spark.SparkContext\nspark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@5b49a2ec\n"
    }, {
      "metadata" : { },
      "data" : {
        "text/html" : ""
      },
      "output_type" : "execute_result",
      "execution_count" : 3,
      "time" : "Took: 1.797s, at 2018-05-30 19:33"
    } ]
  }, {
    "metadata" : {
      "trusted" : true,
      "input_collapsed" : false,
      "collapsed" : false,
      "id" : "BBE39DAF284C4BAD809FCD5DA69084A1"
    },
    "cell_type" : "code",
    "source" : [ "import org.apache.spark.ml.feature.PCA\n", "import org.apache.spark.mllib.linalg.Matrix\n", "import org.apache.spark.mllib.linalg.Vectors\n", "import org.apache.spark.SparkContext\n", "import org.apache.spark.mllib.linalg.distributed.RowMatrix\n", "import org.apache.spark.mllib.linalg.{Matrix, Matrices}\n", "import scala.util.control.Breaks._\n", "\n", "\n", "\n", "import math._\n", "import Array._\n", "\n", "class GTM\n", "{\n", " val tol=1e-3\n", " val verbose = false\n", " val max_iter=10\n", " val alpha=1e-3\n", " val sigma = 1\n", " val method = \"mean\"  \n", "  \n", "  def distance(xs: Array[Double], ys: Array[Double]) = {\n", "    sqrt((xs zip ys).map { case (x,y) => pow(y - x, 2) }.sum)\n", "    }  \n", "\n", " def cdist(xs: Double, ys: Double) = {\n", "    sqrt(( pow(ys - xs, 2) ))\n", "    }  \n", "\n", "  def get_lattice_points(n_grid:Int): Array[Array[Double]] ={\n", "    val r = scala.util.Random\n", "    val mat=  Array.range(0,n_grid).map(i => Array.range(0,3).map(j =>r.nextDouble() ))\n", "    return mat.transpose  \n", "    }  \n", "\n", "//def init()\n", "\n", "//generate map  \n", " val z = get_lattice_points(3)\n", " val rbfs = get_lattice_points(2)\n", " val dd = z.flatMap(x =>rbfs.map(y => {distance(x,y)}))\n", " val phi =  dd.map({case (t:Double) => (exp(-t)/2*sigma) })\n", " val r = scala.util.Random\n", "\n", "//init W and beta from PCA\n", " val data = Array(\n", "  Vectors.dense(Array.range(0,3).map(i => r.nextDouble())),\n", "  Vectors.dense(Array.range(0,3).map(i => r.nextDouble())),\n", "  Vectors.dense(Array.range(0,3).map(i => r.nextDouble())) )\n", "\n", " val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF(\"features\")\n", "\n", " val pca = new PCA()\n", "  .setInputCol(\"features\")\n", "  .setOutputCol(\"pcaFeatures\")\n", "  .setK(3)\n", "  .fit(df)\n", "\n", "\n", " import breeze.linalg._\n", "\n", " val pcc = pca.pc\n", " val z2 = DenseMatrix(z.map(_.toArray):_*) \n", " val phi2 = DenseMatrix(phi.map(_.toDouble):_*)\n", " val phinv = pinv(phi2)\n", " val w = z2.toArray * phinv.toArray * pcc.toArray.patch(9, Nil, 1)\n", "\n", " val betainv1   = pca.explainedVariance(2)\n", " val dot1 =phi * w\n", " val dot2 =phi * w\n", " val inter_dist1 =dot1.map(x=>dot2.map(y=> cdist(x,y)))\n", " val inter_dist = DenseMatrix(inter_dist1.map(_.toArray):_*)     \n", "//diagonal\n", " List.range(0,inter_dist.rows).flatMap(i =>List.range(0,inter_dist.cols)\n", "                             .map(j=>  if (j==i)  \n", "                              inter_dist(i,j) = Double.PositiveInfinity))\n", "  \n", " val betainv2 = inter_dist.toArray.reduceLeft(_ min _)/2\n", " val beta = 1 / max(betainv1, betainv2)  \n", "\n", " val p1 = exp(-beta/2) \n", " val temp1 = DenseMatrix(rbfs.map(_.toArray):_*) \n", " val temp2 = temp1.toArray\n", " val p2 = dot1.map(x =>temp2.map(y => {cdist(x,y)}))\n", "\n", " val temp3 = DenseMatrix(p2.map(_.toArray):_*) \n", " \n", " val temp5 = DenseMatrix(z.map(_.toArray):_*) \n", "\n", "\n", " def responsability():breeze.linalg.DenseMatrix[Double]=\n", "  { \n", "  val pp2 = temp3.toArray\n", "  val p = pp2.map(x => x * p1)\n", "\n", "  val p3 = DenseMatrix(p:_*) \n", "  val somme = sum(p3 ,Axis._0 )\n", "  \n", "  val re = p3.map( j =>  j /:/ somme)\n", "  val re1 = re.toArray  \n", "  val t3 = DenseMatrix(re1.map(_.toArray):_*) \n", "  return t3.reshape(27,2)\n", "  \n", " }\n", "\n", "def likelihood () :Double=\n", "  {\n", "   val R = responsability() \n", "   val D = temp1.cols\n", "   val k1 = (D /2) * log(beta / 2* Pi)  \n", "   val k2 = temp3.map(i => i * (-beta /2)) \n", "  return sum(R.reshape(6,9) * (k2 +k1))\n", "  }\n", "  \n", "def fit()  \n", "  {\n", "\n", "\n", "range(1 ,max_iter).foreach(i =>\n", "{  \n", "  val R = responsability()  \n", "  val G = diag(sum(R , Axis._1 ))\n", "\n", "  val A1 = G.reshape(81, 9) * phi2 * phi2.t\n", "\n", "  val a1 = DenseMatrix.eye[Double](27)\n", "  val ze = alpha /beta\n", "  val a2 =a1.map( i => i * ze) \n", "  val A2 = A1.reshape(27,27) + a2\n", "\n", "  val temp4 =phi2.t.reshape(3,3)\n", "  val A3 = temp4 * temp5 * R.reshape(3, 18)\n", "  val W = A2.reshape(3,243) \\ A3        //Solve (A2, A3)\n", "  val Beta = temp1.toArray.length / sum(temp3 * R.reshape(6,9))\n", "  \n", "  \n", "  val likelihood1 = likelihood()\n", "  var prev_likelihood_ = Double.NegativeInfinity\n", "  val diff = abs(likelihood1 - prev_likelihood_) / temp1.rows\n", "  prev_likelihood_ = likelihood1\n", " \n", "  if (verbose)\n", "    print(i+1, likelihood , diff)\n", "   \n", "  if (diff < tol)\n", "    if (verbose)\n", "      print(\"converged\")\n", "    break\n", "  })\n", " }\n", "  \n", "def transform() :DenseMatrix[Double]=\n", "  {\n", "  assert (method == \"mean\")  \n", "  val  R = responsability()\n", "    \n", "    \n", "    val a3= method.filter(e => e==\"mean\" ) \n", "      (temp5.t * R.reshape(3, 18)).t \n", "       \n", "    val a2= method.filter(e => e==\"mode\" ) \n", "      temp1(argmax(argmax(responsability(), Axis._0)),::).t.toDenseMatrix \n", "}\n", "     \n", "  \n", "def inverse_transform(): Array[Double]=\n", "  {\n", "    val d = z.transpose.flatMap(i => rbfs.map(j=> distance(i, j)))\n", "    val phi =  d.map({case (t:Double) => (exp(-t)/2*sigma) })\n", "    return  w * phi\n", "  }\n", "  \n", "  \n", "  \n", "  \n", "  \n", "  \n", "  \n", "  \n", "  \n", "  \n", "}" ],
    "outputs" : [ {
      "name" : "stdout",
      "output_type" : "stream",
      "text" : "import org.apache.spark.ml.feature.PCA\nimport org.apache.spark.mllib.linalg.Matrix\nimport org.apache.spark.mllib.linalg.Vectors\nimport org.apache.spark.SparkContext\nimport org.apache.spark.mllib.linalg.distributed.RowMatrix\nimport org.apache.spark.mllib.linalg.{Matrix, Matrices}\nimport scala.util.control.Breaks._\nimport math._\nimport Array._\ndefined class GTM\n"
    }, {
      "metadata" : { },
      "data" : {
        "text/html" : ""
      },
      "output_type" : "execute_result",
      "execution_count" : 10,
      "time" : "Took: 4.334s, at 2018-05-30 19:40"
    } ]
  }, {
    "metadata" : {
      "trusted" : true,
      "input_collapsed" : false,
      "collapsed" : true,
      "id" : "9451B261D2494585AE2ACA652A554E1C"
    },
    "cell_type" : "code",
    "source" : [ "" ],
    "outputs" : [ ]
  } ],
  "nbformat" : 4
}