Commit b848e6cd authored by AREZKI HAFID's avatar AREZKI HAFID
Browse files

last version

parent 8ee1ab3b
......@@ -31,7 +31,7 @@
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.SparkContext\nspark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@18ee7055\n"
"text" : "import org.apache.spark.SparkContext\nspark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@394ab6bf\n"
}, {
"metadata" : { },
"data" : {
......@@ -39,7 +39,7 @@
},
"output_type" : "execute_result",
"execution_count" : 1,
"time" : "Took: 1.933s, at 2018-06-22 00:23"
"time" : "Took: 2.317s, at 2018-06-22 11:02"
} ]
}, {
"metadata" : {
......@@ -53,7 +53,7 @@
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.ml.linalg.Vectors\nr: util.Random.type = scala.util.Random$@298e22d5\nimport Array._\nX: Array[org.apache.spark.ml.linalg.Vector] = Array([0.3992103139732748,0.15931392136077793], [0.1681336055048711,0.232587262081421], [0.5789260963806718,0.5978202204947635], [0.6468902068002914,0.8231631054805284], [0.36276317810451,0.14663951093921612])\ndf: org.apache.spark.sql.DataFrame = [features: vector]\n"
"text" : "import org.apache.spark.ml.linalg.Vectors\nr: util.Random.type = scala.util.Random$@4569ee05\nimport Array._\nX: Array[org.apache.spark.ml.linalg.Vector] = Array([0.8351892492824972,0.346842053786527], [0.46507318688024524,0.7788778379796375], [0.09582206518855885,0.24205346537293704], [0.7516581164876375,0.10844053576979773], [0.651888247236623,0.22178271721602494])\ndf: org.apache.spark.sql.DataFrame = [features: vector]\n"
}, {
"metadata" : { },
"data" : {
......@@ -61,7 +61,7 @@
},
"output_type" : "execute_result",
"execution_count" : 2,
"time" : "Took: 6.117s, at 2018-06-22 00:23"
"time" : "Took: 5.432s, at 2018-06-22 11:02"
} ]
}, {
"metadata" : {
......@@ -95,7 +95,7 @@
"id" : "64B8AB887DA040A4BA813B8C9C115908"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.mllib.linalg.Matrix\n", "import org.apache.spark.SparkContext\n", "import org.apache.spark.mllib.linalg.distributed.RowMatrix\n", "import org.apache.spark.mllib.linalg.{Matrix, Matrices}\n", "import scala.util.control.Breaks._\n", "import breeze.linalg._\n", "import org.apache.spark.ml.feature.PCA\n", "import math._\n", "\n", "class GTM (X: Array[org.apache.spark.ml.linalg.Vector],\n", " n_components :Int, max_iter:Int, tol:Double, verbose:Boolean)\n", "{\n", " //val tol=1e-3\n", " //val verbose = false\n", " //val max_iter=10\n", " //val n_components = 2 \n", " var alpha=1e-3\n", " var sigma = 1\n", " var method = \"mean\" \n", " var n_grids = 20 \n", " var n_rbfs = 10 \n", " \n", " def cdist(xs: Double, ys: Double) = {\n", " sqrt(( pow(ys - xs, 2) ))\n", " }\n", "\n", " def get_lattice_points(n_grid:Int): DenseMatrix[Double] ={\n", " var a = range(0,n_components).map(e =>DenseMatrix( linspace(-1, 1, n_grid))).toArray\n", " return DenseMatrix(a.map(_.toArray):_*).t\n", " }\n", " def distance (a :DenseMatrix[Double], b :DenseMatrix[Double]) :DenseMatrix[Double]={\n", " var e0 = a.toArray.map(e => b.toArray.map(i => cdist(e,i)))\n", " var e1 =DenseMatrix(e0.map(_.toArray):_*).toArray\n", " var e2 = e1.take(a.rows * b.rows)\n", " var e3 = DenseMatrix(e2.map(_.toDouble):_*)\n", " return e3.reshape(a.rows , b.rows)\n", " }\n", " var z =get_lattice_points(n_grids)\n", " var rbfs=get_lattice_points(n_rbfs)\n", " var d = distance(z, rbfs)\n", " val phi = d.map({case (t:Double) => (exp(-t)/(2*sigma)) })\n", "\n", " var pca = new PCA()\n", " .setInputCol(\"features\")\n", " .setOutputCol(\"pcaFeatures\")\n", " .setK(2)\n", " .fit(df)\n", " val pc = pca.pc\n", " val pcc = DenseMatrix(pc.toArray.map(_.toDouble):_*)\n", " \n", " var w =pinv(phi) * z * pcc.reshape(z.cols, pcc.rows/z.cols)\n", " val betainv1 = pca.explainedVariance(1)\n", " val w_phi = (phi * w).toArray \n", " val inter_dist1 =(phi * w).toArray.map(x=>(phi * w).toArray.map(y=> cdist(x,y)))\n", " val inter_dist = DenseMatrix(inter_dist1.map(_.toArray):_*) \n", " List.range(0,inter_dist.rows).flatMap(i =>List.range(0,inter_dist.cols)\n", " .map(j=> if (j==i) \n", " inter_dist(i,j) = Double.PositiveInfinity))\n", " \n", " \n", " import breeze.linalg._ \n", " var betainv2 = inter_dist.toArray.reduceLeft(_ min _)/2\n", " var beta = 1/max(betainv1, betainv2)\n", " \n", " def responsability(X: Array[org.apache.spark.ml.linalg.Vector]):Array[Double]={\n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " var p2=(w_phi zip x).map{case (x,y) => cdist(x,y)}.map(e => e * exp(-beta/2) )\n", " var p3= p2.map(e => e /:/ sum(DenseMatrix(p2:_*) , Axis._0))\n", " return DenseMatrix(p3.map(_.toArray):_*).toArray\n", " }\n", " \n", " def likelihood (X: Array[org.apache.spark.ml.linalg.Vector]) :Double=\n", " {\n", " var R = responsability(X) \n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " var xd = DenseMatrix(X.map(_.toArray):_*)\n", " var D = xd.cols\n", " var k1 = (D /2) * log(beta / 2* Pi) \n", " var k = (w_phi zip x).map{case (a, b) => cdist(a,b)}.map(e => e * (-beta/2) )\n", " var k2 = DenseMatrix(k.map(_.toDouble):_*) \n", " return sum(DenseMatrix(R.map(_.toDouble):_*) *:* (k2 + k1))\n", " }\n", " \n", " def fit (X: Array[org.apache.spark.ml.linalg.Vector])=\n", " {\n", " range(1 ,max_iter).map(i =>\n", " {\n", " var R = responsability(X) \n", " var xx = DenseMatrix(R.map(_.toDouble):_*) \n", " var G = diag(sum(xx , Axis._1 ))\n", " var XX = DenseMatrix(X.map(_.toArray):_*)\n", "\n", " val w = (phi * G * phi.t + DenseMatrix.eye[Double](phi.rows).map( i => i * alpha /beta)) \\ \n", " (phi *xx * XX.reshape(xx.cols, xx.rows))\n", " \n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " val beta1 = X.length / sum((w_phi zip x).map{case (a, b) => cdist(a,b)} * R)\n", " \n", " val likelihood1 = likelihood(X)\n", " var prev_likelihood_ = Double.NegativeInfinity\n", " \n", " val diff = abs(likelihood1 - prev_likelihood_) / XX.rows\n", " prev_likelihood_ = likelihood1\n", "\n", " if (verbose)\n", " print(i+1 ,likelihood1 , diff)\n", " \n", " if (diff < tol)\n", " if (verbose)\n", " print(\"converged\")\n", " break\n", " })\n", " }\n", " \n", " \n", " def transform(X: Array[org.apache.spark.ml.linalg.Vector]) :DenseMatrix[Double]=\n", " {\n", " var R = responsability(X)\n", " var xx = DenseMatrix(R.map(_.toDouble):_*) \n", " \n", "\n", " method.filter(e => e ==\"mean\" ) \n", " return z * xx.t.reshape(z.cols ,xx.rows/z.cols )\n", "\n", " method.filter(e => e ==\"mode\" ) \n", " return z(argmax(responsability(X)), ::).t.toDenseMatrix\n", " }\n", " \n", " def inverse_transform(X: Array[org.apache.spark.ml.linalg.Vector]): DenseMatrix[Double]=\n", " {\n", " var XX = DenseMatrix(X.map(_.toArray):_*)\n", " var dd = rbfs.toArray.map(e => XX.toArray.map(i => cdist(e,i)))\n", " var d =DenseMatrix(dd.map(_.toArray):_*)\n", " var phi = d.map({case (t:Double) => (exp(-t)/2*sigma) })\n", " return phi * w\n", " }\n", " \n", "}\n" ],
"source" : [ "import org.apache.spark.mllib.linalg.Matrix\n", "import org.apache.spark.SparkContext\n", "import org.apache.spark.mllib.linalg.distributed.RowMatrix\n", "import org.apache.spark.mllib.linalg.{Matrix, Matrices}\n", "import scala.util.control.Breaks._\n", "import breeze.linalg._\n", "import org.apache.spark.ml.feature.PCA\n", "import math._\n", "\n", "class GTM (X: Array[org.apache.spark.ml.linalg.Vector],\n", " n_components :Int, max_iter:Int, tol:Double, verbose:Boolean)\n", "{\n", " //val tol=1e-3\n", " //val verbose = false\n", " //val max_iter=10\n", " //val n_components = 2 \n", " var alpha=1e-3\n", " var sigma = 1\n", " var method = \"mean\" \n", " var n_grids = 20 \n", " var n_rbfs = 10 \n", " var prev_likelihood_ = Double.NegativeInfinity\n", "\n", " \n", " def cdist(xs: Double, ys: Double) = {\n", " sqrt(( pow(ys - xs, 2) ))\n", " }\n", "\n", " def get_lattice_points(n_grid:Int): DenseMatrix[Double] ={\n", " var a = range(0,n_components).map(e =>DenseMatrix( linspace(-1, 1, n_grid))).toArray\n", " return DenseMatrix(a.map(_.toArray):_*).t\n", " }\n", " def distance (a :DenseMatrix[Double], b :DenseMatrix[Double]) :DenseMatrix[Double]={\n", " var e0 = a.toArray.map(e => b.toArray.map(i => cdist(e,i)))\n", " var e1 =DenseMatrix(e0.map(_.toArray):_*).toArray\n", " var e2 = e1.take(a.rows * b.rows)\n", " var e3 = DenseMatrix(e2.map(_.toDouble):_*)\n", " return e3.reshape(a.rows , b.rows)\n", " }\n", " var z =get_lattice_points(n_grids)\n", " var rbfs=get_lattice_points(n_rbfs)\n", " var d = distance(z, rbfs)\n", " val phi = d.map({case (t:Double) => (exp(-t)/(2*sigma)) })\n", "\n", " var pca = new PCA()\n", " .setInputCol(\"features\")\n", " .setOutputCol(\"pcaFeatures\")\n", " .setK(2)\n", " .fit(df)\n", " val pc = pca.pc\n", " val pcc = DenseMatrix(pc.toArray.map(_.toDouble):_*)\n", " \n", " var w =pinv(phi) * z * pcc.reshape(z.cols, pcc.rows/z.cols)\n", " val betainv1 = pca.explainedVariance(1)\n", " val w_phi = (phi * w).toArray \n", " val inter_dist1 =(phi * w).toArray.map(x=>(phi * w).toArray.map(y=> cdist(x,y)))\n", " val inter_dist = DenseMatrix(inter_dist1.map(_.toArray):_*) \n", " List.range(0,inter_dist.rows).flatMap(i =>List.range(0,inter_dist.cols)\n", " .map(j=> if (j==i) \n", " inter_dist(i,j) = Double.PositiveInfinity))\n", " \n", " \n", " import breeze.linalg._ \n", " var betainv2 = inter_dist.toArray.reduceLeft(_ min _)/2\n", " var beta = 1/max(betainv1, betainv2)\n", " \n", " def responsability(X: Array[org.apache.spark.ml.linalg.Vector]):Array[Double]={\n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " var p2=(w_phi zip x).map{case (x,y) => cdist(x,y)}.map(e => e * exp(-beta/2) )\n", " var p3= p2.map(e => e /:/ sum(DenseMatrix(p2:_*) , Axis._0))\n", " return DenseMatrix(p3.map(_.toArray):_*).toArray\n", " }\n", " \n", " def likelihood (X: Array[org.apache.spark.ml.linalg.Vector]) :Double=\n", " {\n", " var R = responsability(X) \n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " var xd = DenseMatrix(X.map(_.toArray):_*)\n", " var D = xd.cols\n", " var k1 = (D /2) * log(beta / 2* Pi) \n", " var k = (w_phi zip x).map{case (a, b) => cdist(a,b)}.map(e => e * (-beta/2) )\n", " var k2 = DenseMatrix(k.map(_.toDouble):_*) \n", " return sum(DenseMatrix(R.map(_.toDouble):_*) *:* (k2 + k1))\n", " }\n", " \n", " def fit (X: Array[org.apache.spark.ml.linalg.Vector])=\n", " {\n", " range(0 ,max_iter).foreach(i =>\n", " {\n", " var R = responsability(X) \n", " var xx = DenseMatrix(R.map(_.toDouble):_*) \n", " var G = diag(sum(xx , Axis._1 ))\n", " var XX = DenseMatrix(X.map(_.toArray):_*)\n", "\n", " var w = (phi * G * phi.t + DenseMatrix.eye[Double](phi.rows).map( i => i * alpha /beta)) \\ \n", " (phi *xx * XX.reshape(xx.cols, xx.rows))\n", " \n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " var beta1 = X.length / sum((w_phi zip x).map{case (a, b) => cdist(a,b)} * R)\n", " \n", " var likelihood1 = likelihood(X)\n", " \n", " var diff = abs(likelihood1 - prev_likelihood_) / XX.rows\n", " prev_likelihood_ = likelihood1\n", "\n", " if (verbose)\n", " println(i+1 ,likelihood1 , diff)\n", " \n", " if (diff < tol)\n", " if (verbose)\n", " println(\"converged\")\n", " //break\n", " })\n", " }\n", " \n", " \n", " def transform(X: Array[org.apache.spark.ml.linalg.Vector]) :DenseMatrix[Double]=\n", " {\n", " var R = responsability(X)\n", " var xx = DenseMatrix(R.map(_.toDouble):_*) \n", " \n", "\n", " method.filter(e => e ==\"mean\" ) \n", " return z * xx.t.reshape(z.cols ,xx.rows/z.cols )\n", "\n", " method.filter(e => e ==\"mode\" ) \n", " return z(argmax(responsability(X)), ::).t.toDenseMatrix\n", " }\n", " \n", " def inverse_transform(X: Array[org.apache.spark.ml.linalg.Vector]): DenseMatrix[Double]=\n", " {\n", " var XX = DenseMatrix(X.map(_.toArray):_*)\n", " var dd = rbfs.toArray.map(e => XX.toArray.map(i => cdist(e,i)))\n", " var d =DenseMatrix(dd.map(_.toArray):_*)\n", " var phi = d.map({case (t:Double) => (exp(-t)/2*sigma) })\n", " return phi * w\n", " }\n", " \n", "}\n" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
......@@ -106,8 +106,8 @@
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 6,
"time" : "Took: 5.093s, at 2018-06-22 00:23"
"execution_count" : 77,
"time" : "Took: 8.010s, at 2018-06-22 12:58"
} ]
}, {
"metadata" : {
......@@ -117,34 +117,56 @@
"id" : "3BA3496778AD4A33AFEA8ADA271A1DF8"
},
"cell_type" : "code",
"source" : [ "" ],
"source" : [ "object MyGtm extends GTM(X: Array[org.apache.spark.ml.linalg.Vector],\n", " n_components =2, max_iter=50, tol=1e-2, verbose=true) " ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "defined object MyGtm\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 4,
"time" : "Took: 2.983s, at 2018-06-22 00:23"
"execution_count" : 80,
"time" : "Took: 6.054s, at 2018-06-22 13:11"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "B693A823098042EAA40397930B659A12"
"presentation" : {
"tabs_state" : "{\n \"tab_id\": \"#tab1256557178-0\"\n}",
"pivot_chart_state" : "{\n \"hiddenAttributes\": [],\n \"menuLimit\": 200,\n \"cols\": [],\n \"rows\": [],\n \"vals\": [],\n \"exclusions\": {},\n \"inclusions\": {},\n \"unusedAttrsVertical\": 85,\n \"autoSortUnusedAttrs\": false,\n \"inclusionsInfo\": {},\n \"aggregatorName\": \"Count\",\n \"rendererName\": \"Table\"\n}"
},
"id" : "ED85951E4A0840AB9A73714FE5B762DD"
},
"cell_type" : "code",
"source" : [ "" ],
"source" : [ "MyGtm.fit(X)" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "(1,-19.79612687824985,Infinity)\n(2,-19.79612687824985,0.0)\nconverged\n(3,-19.79612687824985,0.0)\nconverged\n(4,-19.79612687824985,0.0)\nconverged\n(5,-19.79612687824985,0.0)\nconverged\n(6,-19.79612687824985,0.0)\nconverged\n(7,-19.79612687824985,0.0)\nconverged\n(8,-19.79612687824985,0.0)\nconverged\n(9,-19.79612687824985,0.0)\nconverged\n(10,-19.79612687824985,0.0)\nconverged\n(11,-19.79612687824985,0.0)\nconverged\n(12,-19.79612687824985,0.0)\nconverged\n(13,-19.79612687824985,0.0)\nconverged\n(14,-19.79612687824985,0.0)\nconverged\n(15,-19.79612687824985,0.0)\nconverged\n(16,-19.79612687824985,0.0)\nconverged\n(17,-19.79612687824985,0.0)\nconverged\n(18,-19.79612687824985,0.0)\nconverged\n(19,-19.79612687824985,0.0)\nconverged\n(20,-19.79612687824985,0.0)\nconverged\n(21,-19.79612687824985,0.0)\nconverged\n(22,-19.79612687824985,0.0)\nconverged\n(23,-19.79612687824985,0.0)\nconverged\n(24,-19.79612687824985,0.0)\nconverged\n(25,-19.79612687824985,0.0)\nconverged\n(26,-19.79612687824985,0.0)\nconverged\n(27,-19.79612687824985,0.0)\nconverged\n(28,-19.79612687824985,0.0)\nconverged\n(29,-19.79612687824985,0.0)\nconverged\n(30,-19.79612687824985,0.0)\nconverged\n(31,-19.79612687824985,0.0)\nconverged\n(32,-19.79612687824985,0.0)\nconverged\n(33,-19.79612687824985,0.0)\nconverged\n(34,-19.79612687824985,0.0)\nconverged\n(35,-19.79612687824985,0.0)\nconverged\n(36,-19.79612687824985,0.0)\nconverged\n(37,-19.79612687824985,0.0)\nconverged\n(38,-19.79612687824985,0.0)\nconverged\n(39,-19.79612687824985,0.0)\nconverged\n(40,-19.79612687824985,0.0)\nconverged\n(41,-19.79612687824985,0.0)\nconverged\n(42,-19.79612687824985,0.0)\nconverged\n(43,-19.79612687824985,0.0)\nconverged\n(44,-19.79612687824985,0.0)\nconverged\n(45,-19.79612687824985,0.0)\nconverged\n(46,-19.79612687824985,0.0)\nconverged\n(47,-19.79612687824985,0.0)\nconverged\n(48,-19.79612687824985,0.0)\nconverged\n(49,-19.79612687824985,0.0)\nconverged\n(50,-19.79612687824985,0.0)\nconverged\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 5,
"time" : "Took: 2.101s, at 2018-06-22 00:23"
"execution_count" : 81,
"time" : "Took: 8.402s, at 2018-06-22 13:11"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : true,
"id" : "D0D50B030E8D4441A782E4D9B65EA0D4"
},
"cell_type" : "code",
"source" : [ "" ],
"outputs" : [ ]
} ],
"nbformat" : 4
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment