Commit 56a4cffa authored by AREZKI HAFID's avatar AREZKI HAFID
Browse files
parents 13fbcdd1 ccbb2510
{
"metadata" : {
"id" : "bc2cd7fd-9fc8-431b-8a04-ec15af824c67",
"name" : "final_lipn",
"user_save_timestamp" : "1970-01-01T01:00:00.000Z",
"auto_save_timestamp" : "1970-01-01T01:00:00.000Z",
"language_info" : {
"name" : "scala",
"file_extension" : "scala",
"codemirror_mode" : "text/x-scala"
},
"trusted" : true,
"sparkNotebook" : null,
"customLocalRepo" : null,
"customRepos" : null,
"customDeps" : null,
"customImports" : null,
"customArgs" : null,
"customSparkConf" : null,
"customVars" : null
},
"cells" : [ {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "E6FF57A50C0E45758ABE2D68933A2DAB"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.SparkContext\n", "\n", "val spark = SparkSession \n", ".builder \n", ".appName (\"WorldBankIndex\") \n", ".getOrCreate ()" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.SparkContext\nspark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@18ee7055\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 1,
"time" : "Took: 1.933s, at 2018-06-22 00:23"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "38E75D0E9A91489C83BBE430B8F579EF"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.ml.linalg.Vectors\n", "val r = scala.util.Random\n", "import Array._\n", "\n", "val X = (\n", " range(0,5).map(i => Vectors.dense(Array.range(0,2).map(i => r.nextDouble()))))\n", " val df = spark.createDataFrame(X.map(Tuple1.apply)).toDF(\"features\")" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.ml.linalg.Vectors\nr: util.Random.type = scala.util.Random$@298e22d5\nimport Array._\nX: Array[org.apache.spark.ml.linalg.Vector] = Array([0.3992103139732748,0.15931392136077793], [0.1681336055048711,0.232587262081421], [0.5789260963806718,0.5978202204947635], [0.6468902068002914,0.8231631054805284], [0.36276317810451,0.14663951093921612])\ndf: org.apache.spark.sql.DataFrame = [features: vector]\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 2,
"time" : "Took: 6.117s, at 2018-06-22 00:23"
} ]
}, {
"metadata" : {
"id" : "84A20896670C4C5B8AE4400861271E55"
},
"cell_type" : "markdown",
"source" : "***responsability***\n\n$p(\\textbf{t|x}, \\textbf{W},\\beta) = (\\frac{\\beta}{2\\pi})^\\frac{D}{2} \\exp \\left\\{ \n \\frac{- \\beta}{2}\n\\mid y(x; \\textbf{W})-t \\mid ^2 \\right\\} $"
}, {
"metadata" : {
"id" : "E3F138364D0D42488CF4B0BA8D30A8C3"
},
"cell_type" : "markdown",
"source" : "***Likelihood***\n\n$\\mathfrak L (\\textbf{W}, \\beta)= \\ln \\prod\\limits_{\\substack{n=1}}^{N} p(t_n|W, \\beta) $"
}, {
"metadata" : {
"id" : "7C190FB0249F468395BD83082782801C"
},
"cell_type" : "markdown",
"source" : "***transform***\n\n$ (x| t_n, W^* , \\beta ^*) = \\int p(x|t_n, W^*,\\beta ^*)x dx $\n\n$= \\sum_{i=1}^{K} R_{in} x_i$"
}, {
"metadata" : {
"id" : "ACCC0375B1034ED580389E0BD14B5040"
},
"cell_type" : "markdown",
"source" : "***inverse_transform***\n\n$ E = 1/2 \\sum_{i} \\left \\| W \\varnothing (x_{i}) - Ux_I\\right \\| $"
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "64B8AB887DA040A4BA813B8C9C115908"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.mllib.linalg.Matrix\n", "import org.apache.spark.SparkContext\n", "import org.apache.spark.mllib.linalg.distributed.RowMatrix\n", "import org.apache.spark.mllib.linalg.{Matrix, Matrices}\n", "import scala.util.control.Breaks._\n", "import breeze.linalg._\n", "import org.apache.spark.ml.feature.PCA\n", "import math._\n", "\n", "class GTM (X: Array[org.apache.spark.ml.linalg.Vector],\n", " n_components :Int, max_iter:Int, tol:Double, verbose:Boolean)\n", "{\n", " //val tol=1e-3\n", " //val verbose = false\n", " //val max_iter=10\n", " //val n_components = 2 \n", " var alpha=1e-3\n", " var sigma = 1\n", " var method = \"mean\" \n", " var n_grids = 20 \n", " var n_rbfs = 10 \n", " \n", " def cdist(xs: Double, ys: Double) = {\n", " sqrt(( pow(ys - xs, 2) ))\n", " }\n", "\n", " def get_lattice_points(n_grid:Int): DenseMatrix[Double] ={\n", " var a = range(0,n_components).map(e =>DenseMatrix( linspace(-1, 1, n_grid))).toArray\n", " return DenseMatrix(a.map(_.toArray):_*).t\n", " }\n", " def distance (a :DenseMatrix[Double], b :DenseMatrix[Double]) :DenseMatrix[Double]={\n", " var e0 = a.toArray.map(e => b.toArray.map(i => cdist(e,i)))\n", " var e1 =DenseMatrix(e0.map(_.toArray):_*).toArray\n", " var e2 = e1.take(a.rows * b.rows)\n", " var e3 = DenseMatrix(e2.map(_.toDouble):_*)\n", " return e3.reshape(a.rows , b.rows)\n", " }\n", " var z =get_lattice_points(n_grids)\n", " var rbfs=get_lattice_points(n_rbfs)\n", " var d = distance(z, rbfs)\n", " val phi = d.map({case (t:Double) => (exp(-t)/(2*sigma)) })\n", "\n", " var pca = new PCA()\n", " .setInputCol(\"features\")\n", " .setOutputCol(\"pcaFeatures\")\n", " .setK(2)\n", " .fit(df)\n", " val pc = pca.pc\n", " val pcc = DenseMatrix(pc.toArray.map(_.toDouble):_*)\n", " \n", " var w =pinv(phi) * z * pcc.reshape(z.cols, pcc.rows/z.cols)\n", " val betainv1 = pca.explainedVariance(1)\n", " val w_phi = (phi * w).toArray \n", " val inter_dist1 =(phi * w).toArray.map(x=>(phi * w).toArray.map(y=> cdist(x,y)))\n", " val inter_dist = DenseMatrix(inter_dist1.map(_.toArray):_*) \n", " List.range(0,inter_dist.rows).flatMap(i =>List.range(0,inter_dist.cols)\n", " .map(j=> if (j==i) \n", " inter_dist(i,j) = Double.PositiveInfinity))\n", " \n", " \n", " import breeze.linalg._ \n", " var betainv2 = inter_dist.toArray.reduceLeft(_ min _)/2\n", " var beta = 1/max(betainv1, betainv2)\n", " \n", " def responsability(X: Array[org.apache.spark.ml.linalg.Vector]):Array[Double]={\n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " var p2=(w_phi zip x).map{case (x,y) => cdist(x,y)}.map(e => e * exp(-beta/2) )\n", " var p3= p2.map(e => e /:/ sum(DenseMatrix(p2:_*) , Axis._0))\n", " return DenseMatrix(p3.map(_.toArray):_*).toArray\n", " }\n", " \n", " def likelihood (X: Array[org.apache.spark.ml.linalg.Vector]) :Double=\n", " {\n", " var R = responsability(X) \n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " var xd = DenseMatrix(X.map(_.toArray):_*)\n", " var D = xd.cols\n", " var k1 = (D /2) * log(beta / 2* Pi) \n", " var k = (w_phi zip x).map{case (a, b) => cdist(a,b)}.map(e => e * (-beta/2) )\n", " var k2 = DenseMatrix(k.map(_.toDouble):_*) \n", " return sum(DenseMatrix(R.map(_.toDouble):_*) *:* (k2 + k1))\n", " }\n", " \n", " def fit (X: Array[org.apache.spark.ml.linalg.Vector])=\n", " {\n", " range(1 ,max_iter).map(i =>\n", " {\n", " var R = responsability(X) \n", " var xx = DenseMatrix(R.map(_.toDouble):_*) \n", " var G = diag(sum(xx , Axis._1 ))\n", " var XX = DenseMatrix(X.map(_.toArray):_*)\n", "\n", " val w = (phi * G * phi.t + DenseMatrix.eye[Double](phi.rows).map( i => i * alpha /beta)) \\ \n", " (phi *xx * XX.reshape(xx.cols, xx.rows))\n", " \n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " val beta1 = X.length / sum((w_phi zip x).map{case (a, b) => cdist(a,b)} * R)\n", " \n", " val likelihood1 = likelihood(X)\n", " var prev_likelihood_ = Double.NegativeInfinity\n", " \n", " val diff = abs(likelihood1 - prev_likelihood_) / XX.rows\n", " prev_likelihood_ = likelihood1\n", "\n", " if (verbose)\n", " print(i+1 ,likelihood1 , diff)\n", " \n", " if (diff < tol)\n", " if (verbose)\n", " print(\"converged\")\n", " break\n", " })\n", " }\n", " \n", " \n", " def transform(X: Array[org.apache.spark.ml.linalg.Vector]) :DenseMatrix[Double]=\n", " {\n", " var R = responsability(X)\n", " var xx = DenseMatrix(R.map(_.toDouble):_*) \n", " \n", "\n", " method.filter(e => e ==\"mean\" ) \n", " return z * xx.t.reshape(z.cols ,xx.rows/z.cols )\n", "\n", " method.filter(e => e ==\"mode\" ) \n", " return z(argmax(responsability(X)), ::).t.toDenseMatrix\n", " }\n", " \n", " def inverse_transform(X: Array[org.apache.spark.ml.linalg.Vector]): DenseMatrix[Double]=\n", " {\n", " var XX = DenseMatrix(X.map(_.toArray):_*)\n", " var dd = rbfs.toArray.map(e => XX.toArray.map(i => cdist(e,i)))\n", " var d =DenseMatrix(dd.map(_.toArray):_*)\n", " var phi = d.map({case (t:Double) => (exp(-t)/2*sigma) })\n", " return phi * w\n", " }\n", " \n", "}\n" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.mllib.linalg.Matrix\nimport org.apache.spark.SparkContext\nimport org.apache.spark.mllib.linalg.distributed.RowMatrix\nimport org.apache.spark.mllib.linalg.{Matrix, Matrices}\nimport scala.util.control.Breaks._\nimport breeze.linalg._\nimport org.apache.spark.ml.feature.PCA\nimport math._\ndefined class GTM\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 6,
"time" : "Took: 5.093s, at 2018-06-22 00:23"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "3BA3496778AD4A33AFEA8ADA271A1DF8"
},
"cell_type" : "code",
"source" : [ "" ],
"outputs" : [ {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 4,
"time" : "Took: 2.983s, at 2018-06-22 00:23"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "B693A823098042EAA40397930B659A12"
},
"cell_type" : "code",
"source" : [ "" ],
"outputs" : [ {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 5,
"time" : "Took: 2.101s, at 2018-06-22 00:23"
} ]
} ],
"nbformat" : 4
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment