Commit 4adbb2f1 authored by AREZKI HAFID's avatar AREZKI HAFID
Browse files

troisieme version

parent d0a9f70f
{
"metadata" : {
"id" : "2f852a9a-018b-4dd9-8c45-f87021dad78a",
"name" : "class_clean.snb.ipynb",
"user_save_timestamp" : "2018-05-22T15:01:51.417Z",
"auto_save_timestamp" : "1970-01-01T01:00:00.000Z",
"language_info" : {
"name" : "scala",
"file_extension" : "scala",
"codemirror_mode" : "text/x-scala"
},
"trusted" : true,
"sparkNotebook" : null,
"customLocalRepo" : null,
"customRepos" : null,
"customDeps" : null,
"customImports" : null,
"customArgs" : null,
"customSparkConf" : null,
"customVars" : null
},
"cells" : [ {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "84A72D1FD7EB49888C7DF4242EB01AED"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.SparkContext\n", "\n", "val spark = SparkSession \n", ".builder \n", ".appName (\"WorldBankIndex\") \n", ".getOrCreate ()\n" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.SparkContext\nspark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@71304681\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 17,
"time" : "Took: 2.233s, at 2018-05-29 14:47"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "BBE39DAF284C4BAD809FCD5DA69084A1"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.ml.feature.PCA\n", "import org.apache.spark.mllib.linalg.Matrix\n", "import org.apache.spark.mllib.linalg.Vectors\n", "import org.apache.spark.SparkContext\n", "import org.apache.spark.mllib.linalg.distributed.RowMatrix\n", "import org.apache.spark.mllib.linalg.{Matrix, Matrices}\n", "import scala.util.control.Breaks._\n", "\n", "\n", "\n", "import math._\n", "import Array._\n", "\n", "class GTM\n", "{\n", " val tol=1e-3\n", " val verbose = false\n", " val max_iter=10\n", " val alpha=1e-3\n", " val sigma = 1\n", " val method = \"mean\" \n", " \n", " def distance(xs: Array[Double], ys: Array[Double]) = {\n", " sqrt((xs zip ys).map { case (x,y) => pow(y - x, 2) }.sum)\n", " } \n", "\n", " def cdist(xs: Double, ys: Double) = {\n", " sqrt(( pow(ys - xs, 2) ))\n", " } \n", "\n", " def get_lattice_points(n_grid:Int): Array[Array[Double]] ={\n", " val r = scala.util.Random\n", " val mat= Array.range(0,n_grid).map(i => Array.range(0,3).map(j =>r.nextDouble() ))\n", " return mat.transpose \n", " } \n", "\n", "//def init()\n", "\n", "//generate map \n", " val z = get_lattice_points(3)\n", " val rbfs = get_lattice_points(2)\n", " val dd = z.flatMap(x =>rbfs.map(y => {distance(x,y)}))\n", " val phi = dd.map({case (t:Double) => (exp(-t)/2*sigma) })\n", " val r = scala.util.Random\n", "\n", "//init W and beta from PCA\n", " val data = Array(\n", " Vectors.dense(Array.range(0,3).map(i => r.nextDouble())),\n", " Vectors.dense(Array.range(0,3).map(i => r.nextDouble())),\n", " Vectors.dense(Array.range(0,3).map(i => r.nextDouble())) )\n", "\n", " val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF(\"features\")\n", "\n", " val pca = new PCA()\n", " .setInputCol(\"features\")\n", " .setOutputCol(\"pcaFeatures\")\n", " .setK(3)\n", " .fit(df)\n", "\n", "\n", " import breeze.linalg._\n", "\n", " val pcc = pca.pc\n", " val z2 = DenseMatrix(z.map(_.toArray):_*) \n", " val phi2 = DenseMatrix(phi.map(_.toDouble):_*)\n", " val phinv = pinv(phi2)\n", " val w = z2.toArray * phinv.toArray * pcc.toArray.patch(9, Nil, 1)\n", "\n", " val betainv1 = pca.explainedVariance(2)\n", " val dot1 =phi * w\n", " val dot2 =phi * w\n", " val inter_dist1 =dot1.map(x=>dot2.map(y=> cdist(x,y)))\n", " val inter_dist = DenseMatrix(inter_dist1.map(_.toArray):_*) \n", "//diagonal\n", " List.range(0,inter_dist.rows).flatMap(i =>List.range(0,inter_dist.cols)\n", " .map(j=> if (j==i) \n", " inter_dist(i,j) = Double.PositiveInfinity))\n", " \n", " val betainv2 = inter_dist.toArray.reduceLeft(_ min _)/2\n", " val beta = 1 / max(betainv1, betainv2) \n", "\n", " val p1 = exp(-beta/2) \n", " val temp1 = DenseMatrix(rbfs.map(_.toArray):_*) \n", " val temp2 = temp1.toArray\n", " val p2 = dot1.map(x =>temp2.map(y => {cdist(x,y)}))\n", "\n", " val temp3 = DenseMatrix(p2.map(_.toArray):_*) \n", " \n", " val temp5 = DenseMatrix(z.map(_.toArray):_*) \n", "\n", "\n", " def responsability():breeze.linalg.DenseMatrix[Double]=\n", " { \n", " val pp2 = temp3.toArray\n", " val p = pp2.map(x => x * p1)\n", "\n", " val p3 = DenseMatrix(p:_*) \n", " val somme = sum(p3 ,Axis._0 )\n", " \n", " val re = p3.map( j => j /:/ somme)\n", " val re1 = re.toArray \n", " val t3 = DenseMatrix(re1.map(_.toArray):_*) \n", " return t3.reshape(27,2)\n", " \n", " }\n", "\n", "def likelihood () :Double=\n", " {\n", " val R = responsability() \n", " val D = temp1.cols\n", " val k1 = (D /2) * log(beta / 2* Pi) \n", " val k2 = temp3.map(i => i * (-beta /2)) \n", " return sum(R.reshape(6,9) * (k2 +k1))\n", " }\n", " \n", "def fit() \n", " {\n", "\n", "\n", "range(1 ,max_iter).foreach(i =>\n", "{ \n", " val R = responsability() \n", " val G = diag(sum(R , Axis._1 ))\n", "\n", " val A1 = G.reshape(81, 9) * phi2 * phi2.t\n", "\n", " val a1 = DenseMatrix.eye[Double](27)\n", " val ze = alpha /beta\n", " val a2 =a1.map( i => i * ze) \n", " val A2 = A1.reshape(27,27) + a2\n", "\n", " val temp4 =phi2.t.reshape(3,3)\n", " val A3 = temp4 * temp5 * R.reshape(3, 18)\n", " val W = A2.reshape(3,243) \\ A3 //Solve (A2, A3)\n", " val Beta = temp1.toArray.length / sum(temp3 * R.reshape(6,9))\n", " \n", " \n", " val likelihood1 = likelihood()\n", " var prev_likelihood_ = Double.NegativeInfinity\n", " val diff = abs(likelihood1 - prev_likelihood_) / temp1.rows\n", " prev_likelihood_ = likelihood1\n", " \n", " if (verbose)\n", " print(i+1, likelihood , diff)\n", " \n", " if (diff < tol)\n", " if (verbose)\n", " print(\"converged\")\n", " break\n", " })\n", " }\n", " \n", "def transform() :DenseMatrix[Double]=\n", " {\n", " assert (method == \"mean\") \n", " val R = responsability()\n", " \n", " if (method == \"mean\") (temp5.t * R.reshape(3, 18)).t \n", " else \n", " temp1(argmax(argmax(responsability(), Axis._0)),::).t.toDenseMatrix \n", "}\n", " \n", " \n", "def inverse_transform(): Array[Double]=\n", " {\n", " val d = z.transpose.flatMap(i => rbfs.map(j=> distance(i, j)))\n", " val phi = d.map({case (t:Double) => (exp(-t)/2*sigma) })\n", " return w * phi\n", " }\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "}" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.ml.feature.PCA\nimport org.apache.spark.mllib.linalg.Matrix\nimport org.apache.spark.mllib.linalg.Vectors\nimport org.apache.spark.SparkContext\nimport org.apache.spark.mllib.linalg.distributed.RowMatrix\nimport org.apache.spark.mllib.linalg.{Matrix, Matrices}\nimport scala.util.control.Breaks._\nimport math._\nimport Array._\ndefined class GTM\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 18,
"time" : "Took: 6.207s, at 2018-05-29 14:47"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : true,
"id" : "9451B261D2494585AE2ACA652A554E1C"
},
"cell_type" : "code",
"source" : [ "" ],
"outputs" : [ ]
} ],
"nbformat" : 4
}
\ No newline at end of file
{
"metadata" : {
"id" : "2f852a9a-018b-4dd9-8c45-f87021dad78a",
"name" : "class_clean.snb.ipynb",
"user_save_timestamp" : "2018-05-22T15:01:51.417Z",
"auto_save_timestamp" : "1970-01-01T01:00:00.000Z",
"language_info" : {
"name" : "scala",
"file_extension" : "scala",
"codemirror_mode" : "text/x-scala"
},
"trusted" : true,
"sparkNotebook" : null,
"customLocalRepo" : null,
"customRepos" : null,
"customDeps" : null,
"customImports" : null,
"customArgs" : null,
"customSparkConf" : null,
"customVars" : null
},
"cells" : [ {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "84A72D1FD7EB49888C7DF4242EB01AED"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.SparkContext\n", "\n", "val spark = SparkSession \n", ".builder \n", ".appName (\"WorldBankIndex\") \n", ".getOrCreate ()\n" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.SparkContext\nspark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@5b49a2ec\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 3,
"time" : "Took: 1.797s, at 2018-05-30 19:33"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "BBE39DAF284C4BAD809FCD5DA69084A1"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.ml.feature.PCA\n", "import org.apache.spark.mllib.linalg.Matrix\n", "import org.apache.spark.mllib.linalg.Vectors\n", "import org.apache.spark.SparkContext\n", "import org.apache.spark.mllib.linalg.distributed.RowMatrix\n", "import org.apache.spark.mllib.linalg.{Matrix, Matrices}\n", "import scala.util.control.Breaks._\n", "\n", "\n", "\n", "import math._\n", "import Array._\n", "\n", "class GTM\n", "{\n", " val tol=1e-3\n", " val verbose = false\n", " val max_iter=10\n", " val alpha=1e-3\n", " val sigma = 1\n", " val method = \"mean\" \n", " \n", " def distance(xs: Array[Double], ys: Array[Double]) = {\n", " sqrt((xs zip ys).map { case (x,y) => pow(y - x, 2) }.sum)\n", " } \n", "\n", " def cdist(xs: Double, ys: Double) = {\n", " sqrt(( pow(ys - xs, 2) ))\n", " } \n", "\n", " def get_lattice_points(n_grid:Int): Array[Array[Double]] ={\n", " val r = scala.util.Random\n", " val mat= Array.range(0,n_grid).map(i => Array.range(0,3).map(j =>r.nextDouble() ))\n", " return mat.transpose \n", " } \n", "\n", "//def init()\n", "\n", "//generate map \n", " val z = get_lattice_points(3)\n", " val rbfs = get_lattice_points(2)\n", " val dd = z.flatMap(x =>rbfs.map(y => {distance(x,y)}))\n", " val phi = dd.map({case (t:Double) => (exp(-t)/2*sigma) })\n", " val r = scala.util.Random\n", "\n", "//init W and beta from PCA\n", " val data = Array(\n", " Vectors.dense(Array.range(0,3).map(i => r.nextDouble())),\n", " Vectors.dense(Array.range(0,3).map(i => r.nextDouble())),\n", " Vectors.dense(Array.range(0,3).map(i => r.nextDouble())) )\n", "\n", " val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF(\"features\")\n", "\n", " val pca = new PCA()\n", " .setInputCol(\"features\")\n", " .setOutputCol(\"pcaFeatures\")\n", " .setK(3)\n", " .fit(df)\n", "\n", "\n", " import breeze.linalg._\n", "\n", " val pcc = pca.pc\n", " val z2 = DenseMatrix(z.map(_.toArray):_*) \n", " val phi2 = DenseMatrix(phi.map(_.toDouble):_*)\n", " val phinv = pinv(phi2)\n", " val w = z2.toArray * phinv.toArray * pcc.toArray.patch(9, Nil, 1)\n", "\n", " val betainv1 = pca.explainedVariance(2)\n", " val dot1 =phi * w\n", " val dot2 =phi * w\n", " val inter_dist1 =dot1.map(x=>dot2.map(y=> cdist(x,y)))\n", " val inter_dist = DenseMatrix(inter_dist1.map(_.toArray):_*) \n", "//diagonal\n", " List.range(0,inter_dist.rows).flatMap(i =>List.range(0,inter_dist.cols)\n", " .map(j=> if (j==i) \n", " inter_dist(i,j) = Double.PositiveInfinity))\n", " \n", " val betainv2 = inter_dist.toArray.reduceLeft(_ min _)/2\n", " val beta = 1 / max(betainv1, betainv2) \n", "\n", " val p1 = exp(-beta/2) \n", " val temp1 = DenseMatrix(rbfs.map(_.toArray):_*) \n", " val temp2 = temp1.toArray\n", " val p2 = dot1.map(x =>temp2.map(y => {cdist(x,y)}))\n", "\n", " val temp3 = DenseMatrix(p2.map(_.toArray):_*) \n", " \n", " val temp5 = DenseMatrix(z.map(_.toArray):_*) \n", "\n", "\n", " def responsability():breeze.linalg.DenseMatrix[Double]=\n", " { \n", " val pp2 = temp3.toArray\n", " val p = pp2.map(x => x * p1)\n", "\n", " val p3 = DenseMatrix(p:_*) \n", " val somme = sum(p3 ,Axis._0 )\n", " \n", " val re = p3.map( j => j /:/ somme)\n", " val re1 = re.toArray \n", " val t3 = DenseMatrix(re1.map(_.toArray):_*) \n", " return t3.reshape(27,2)\n", " \n", " }\n", "\n", "def likelihood () :Double=\n", " {\n", " val R = responsability() \n", " val D = temp1.cols\n", " val k1 = (D /2) * log(beta / 2* Pi) \n", " val k2 = temp3.map(i => i * (-beta /2)) \n", " return sum(R.reshape(6,9) * (k2 +k1))\n", " }\n", " \n", "def fit() \n", " {\n", "\n", "\n", "range(1 ,max_iter).foreach(i =>\n", "{ \n", " val R = responsability() \n", " val G = diag(sum(R , Axis._1 ))\n", "\n", " val A1 = G.reshape(81, 9) * phi2 * phi2.t\n", "\n", " val a1 = DenseMatrix.eye[Double](27)\n", " val ze = alpha /beta\n", " val a2 =a1.map( i => i * ze) \n", " val A2 = A1.reshape(27,27) + a2\n", "\n", " val temp4 =phi2.t.reshape(3,3)\n", " val A3 = temp4 * temp5 * R.reshape(3, 18)\n", " val W = A2.reshape(3,243) \\ A3 //Solve (A2, A3)\n", " val Beta = temp1.toArray.length / sum(temp3 * R.reshape(6,9))\n", " \n", " \n", " val likelihood1 = likelihood()\n", " var prev_likelihood_ = Double.NegativeInfinity\n", " val diff = abs(likelihood1 - prev_likelihood_) / temp1.rows\n", " prev_likelihood_ = likelihood1\n", " \n", " if (verbose)\n", " print(i+1, likelihood , diff)\n", " \n", " if (diff < tol)\n", " if (verbose)\n", " print(\"converged\")\n", " break\n", " })\n", " }\n", " \n", "def transform() :DenseMatrix[Double]=\n", " {\n", " assert (method == \"mean\") \n", " val R = responsability()\n", " \n", " \n", " val a3= method.filter(e => e==\"mean\" ) \n", " (temp5.t * R.reshape(3, 18)).t \n", " \n", " val a2= method.filter(e => e==\"mode\" ) \n", " temp1(argmax(argmax(responsability(), Axis._0)),::).t.toDenseMatrix \n", "}\n", " \n", " \n", "def inverse_transform(): Array[Double]=\n", " {\n", " val d = z.transpose.flatMap(i => rbfs.map(j=> distance(i, j)))\n", " val phi = d.map({case (t:Double) => (exp(-t)/2*sigma) })\n", " return w * phi\n", " }\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "}" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.ml.feature.PCA\nimport org.apache.spark.mllib.linalg.Matrix\nimport org.apache.spark.mllib.linalg.Vectors\nimport org.apache.spark.SparkContext\nimport org.apache.spark.mllib.linalg.distributed.RowMatrix\nimport org.apache.spark.mllib.linalg.{Matrix, Matrices}\nimport scala.util.control.Breaks._\nimport math._\nimport Array._\ndefined class GTM\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 10,
"time" : "Took: 4.334s, at 2018-05-30 19:40"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : true,
"id" : "9451B261D2494585AE2ACA652A554E1C"
},
"cell_type" : "code",
"source" : [ "" ],
"outputs" : [ ]
} ],
"nbformat" : 4
}
\ No newline at end of file
{
"metadata" : {
"id" : "bc2cd7fd-9fc8-431b-8a04-ec15af824c67",
"name" : "final_lipn",
"user_save_timestamp" : "1970-01-01T01:00:00.000Z",
"auto_save_timestamp" : "1970-01-01T01:00:00.000Z",
"language_info" : {
"name" : "scala",
"file_extension" : "scala",
"codemirror_mode" : "text/x-scala"
},
"trusted" : true,
"sparkNotebook" : null,
"customLocalRepo" : null,
"customRepos" : null,
"customDeps" : null,
"customImports" : null,
"customArgs" : null,
"customSparkConf" : null,
"customVars" : null
},
"cells" : [ {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "E6FF57A50C0E45758ABE2D68933A2DAB"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.SparkContext\n", "\n", "val spark = SparkSession \n", ".builder \n", ".appName (\"WorldBankIndex\") \n", ".getOrCreate ()" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.SparkContext\nspark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@18ee7055\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 1,
"time" : "Took: 1.933s, at 2018-06-22 00:23"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "38E75D0E9A91489C83BBE430B8F579EF"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.ml.linalg.Vectors\n", "val r = scala.util.Random\n", "import Array._\n", "\n", "val X = (\n", " range(0,5).map(i => Vectors.dense(Array.range(0,2).map(i => r.nextDouble()))))\n", " val df = spark.createDataFrame(X.map(Tuple1.apply)).toDF(\"features\")" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.ml.linalg.Vectors\nr: util.Random.type = scala.util.Random$@298e22d5\nimport Array._\nX: Array[org.apache.spark.ml.linalg.Vector] = Array([0.3992103139732748,0.15931392136077793], [0.1681336055048711,0.232587262081421], [0.5789260963806718,0.5978202204947635], [0.6468902068002914,0.8231631054805284], [0.36276317810451,0.14663951093921612])\ndf: org.apache.spark.sql.DataFrame = [features: vector]\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 2,
"time" : "Took: 6.117s, at 2018-06-22 00:23"
} ]
}, {
"metadata" : {
"id" : "84A20896670C4C5B8AE4400861271E55"
},
"cell_type" : "markdown",
"source" : "***responsability***\n\n$p(\\textbf{t|x}, \\textbf{W},\\beta) = (\\frac{\\beta}{2\\pi})^\\frac{D}{2} \\exp \\left\\{ \n \\frac{- \\beta}{2}\n\\mid y(x; \\textbf{W})-t \\mid ^2 \\right\\} $"
}, {
"metadata" : {
"id" : "E3F138364D0D42488CF4B0BA8D30A8C3"
},
"cell_type" : "markdown",
"source" : "***Likelihood***\n\n$\\mathfrak L (\\textbf{W}, \\beta)= \\ln \\prod\\limits_{\\substack{n=1}}^{N} p(t_n|W, \\beta) $"
}, {
"metadata" : {
"id" : "7C190FB0249F468395BD83082782801C"
},
"cell_type" : "markdown",
"source" : "***transform***\n\n$ (x| t_n, W^* , \\beta ^*) = \\int p(x|t_n, W^*,\\beta ^*)x dx $\n\n$= \\sum_{i=1}^{K} R_{in} x_i$"
}, {
"metadata" : {
"id" : "ACCC0375B1034ED580389E0BD14B5040"
},
"cell_type" : "markdown",
"source" : "***inverse_transform***\n\n$ E = 1/2 \\sum_{i} \\left \\| W \\varnothing (x_{i}) - Ux_I\\right \\| $"
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "64B8AB887DA040A4BA813B8C9C115908"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.mllib.linalg.Matrix\n", "import org.apache.spark.SparkContext\n", "import org.apache.spark.mllib.linalg.distributed.RowMatrix\n", "import org.apache.spark.mllib.linalg.{Matrix, Matrices}\n", "import scala.util.control.Breaks._\n", "import breeze.linalg._\n", "import org.apache.spark.ml.feature.PCA\n", "import math._\n", "\n", "class GTM (X: Array[org.apache.spark.ml.linalg.Vector],\n", " n_components :Int, max_iter:Int, tol:Double, verbose:Boolean)\n", "{\n", " //val tol=1e-3\n", " //val verbose = false\n", " //val max_iter=10\n", " //val n_components = 2 \n", " var alpha=1e-3\n", " var sigma = 1\n", " var method = \"mean\" \n", " var n_grids = 20 \n", " var n_rbfs = 10 \n", " \n", " def cdist(xs: Double, ys: Double) = {\n", " sqrt(( pow(ys - xs, 2) ))\n", " }\n", "\n", " def get_lattice_points(n_grid:Int): DenseMatrix[Double] ={\n", " var a = range(0,n_components).map(e =>DenseMatrix( linspace(-1, 1, n_grid))).toArray\n", " return DenseMatrix(a.map(_.toArray):_*).t\n", " }\n", " def distance (a :DenseMatrix[Double], b :DenseMatrix[Double]) :DenseMatrix[Double]={\n", " var e0 = a.toArray.map(e => b.toArray.map(i => cdist(e,i)))\n", " var e1 =DenseMatrix(e0.map(_.toArray):_*).toArray\n", " var e2 = e1.take(a.rows * b.rows)\n", " var e3 = DenseMatrix(e2.map(_.toDouble):_*)\n", " return e3.reshape(a.rows , b.rows)\n", " }\n", " var z =get_lattice_points(n_grids)\n", " var rbfs=get_lattice_points(n_rbfs)\n", " var d = distance(z, rbfs)\n", " val phi = d.map({case (t:Double) => (exp(-t)/(2*sigma)) })\n", "\n", " var pca = new PCA()\n", " .setInputCol(\"features\")\n", " .setOutputCol(\"pcaFeatures\")\n", " .setK(2)\n", " .fit(df)\n", " val pc = pca.pc\n", " val pcc = DenseMatrix(pc.toArray.map(_.toDouble):_*)\n", " \n", " var w =pinv(phi) * z * pcc.reshape(z.cols, pcc.rows/z.cols)\n", " val betainv1 = pca.explainedVariance(1)\n", " val w_phi = (phi * w).toArray \n", " val inter_dist1 =(phi * w).toArray.map(x=>(phi * w).toArray.map(y=> cdist(x,y)))\n", " val inter_dist = DenseMatrix(inter_dist1.map(_.toArray):_*) \n", " List.range(0,inter_dist.rows).flatMap(i =>List.range(0,inter_dist.cols)\n", " .map(j=> if (j==i) \n", " inter_dist(i,j) = Double.PositiveInfinity))\n", " \n", " \n", " import breeze.linalg._ \n", " var betainv2 = inter_dist.toArray.reduceLeft(_ min _)/2\n", " var beta = 1/max(betainv1, betainv2)\n", " \n", " def responsability(X: Array[org.apache.spark.ml.linalg.Vector]):Array[Double]={\n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " var p2=(w_phi zip x).map{case (x,y) => cdist(x,y)}.map(e => e * exp(-beta/2) )\n", " var p3= p2.map(e => e /:/ sum(DenseMatrix(p2:_*) , Axis._0))\n", " return DenseMatrix(p3.map(_.toArray):_*).toArray\n", " }\n", " \n", " def likelihood (X: Array[org.apache.spark.ml.linalg.Vector]) :Double=\n", " {\n", " var R = responsability(X) \n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " var xd = DenseMatrix(X.map(_.toArray):_*)\n", " var D = xd.cols\n", " var k1 = (D /2) * log(beta / 2* Pi) \n", " var k = (w_phi zip x).map{case (a, b) => cdist(a,b)}.map(e => e * (-beta/2) )\n", " var k2 = DenseMatrix(k.map(_.toDouble):_*) \n", " return sum(DenseMatrix(R.map(_.toDouble):_*) *:* (k2 + k1))\n", " }\n", " \n", " def fit (X: Array[org.apache.spark.ml.linalg.Vector])=\n", " {\n", " range(1 ,max_iter).map(i =>\n", " {\n", " var R = responsability(X) \n", " var xx = DenseMatrix(R.map(_.toDouble):_*) \n", " var G = diag(sum(xx , Axis._1 ))\n", " var XX = DenseMatrix(X.map(_.toArray):_*)\n", "\n", " val w = (phi * G * phi.t + DenseMatrix.eye[Double](phi.rows).map( i => i * alpha /beta)) \\ \n", " (phi *xx * XX.reshape(xx.cols, xx.rows))\n", " \n", " var x = DenseMatrix(X.map(_.toArray):_*).toArray \n", " val beta1 = X.length / sum((w_phi zip x).map{case (a, b) => cdist(a,b)} * R)\n", " \n", " val likelihood1 = likelihood(X)\n", " var prev_likelihood_ = Double.NegativeInfinity\n", " \n", " val diff = abs(likelihood1 - prev_likelihood_) / XX.rows\n", " prev_likelihood_ = likelihood1\n", "\n", " if (verbose)\n", " print(i+1 ,likelihood1 , diff)\n", " \n", " if (diff < tol)\n", " if (verbose)\n", " print(\"converged\")\n", " break\n", " })\n", " }\n", " \n", " \n", " def transform(X: Array[org.apache.spark.ml.linalg.Vector]) :DenseMatrix[Double]=\n", " {\n", " var R = responsability(X)\n", " var xx = DenseMatrix(R.map(_.toDouble):_*) \n", " \n", "\n", " method.filter(e => e ==\"mean\" ) \n", " return z * xx.t.reshape(z.cols ,xx.rows/z.cols )\n", "\n", " method.filter(e => e ==\"mode\" ) \n", " return z(argmax(responsability(X)), ::).t.toDenseMatrix\n", " }\n", " \n", " def inverse_transform(X: Array[org.apache.spark.ml.linalg.Vector]): DenseMatrix[Double]=\n", " {\n", " var XX = DenseMatrix(X.map(_.toArray):_*)\n", " var dd = rbfs.toArray.map(e => XX.toArray.map(i => cdist(e,i)))\n", " var d =DenseMatrix(dd.map(_.toArray):_*)\n", " var phi = d.map({case (t:Double) => (exp(-t)/2*sigma) })\n", " return phi * w\n", " }\n", " \n", "}\n" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.mllib.linalg.Matrix\nimport org.apache.spark.SparkContext\nimport org.apache.spark.mllib.linalg.distributed.RowMatrix\nimport org.apache.spark.mllib.linalg.{Matrix, Matrices}\nimport scala.util.control.Breaks._\nimport breeze.linalg._\nimport org.apache.spark.ml.feature.PCA\nimport math._\ndefined class GTM\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 6,
"time" : "Took: 5.093s, at 2018-06-22 00:23"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "3BA3496778AD4A33AFEA8ADA271A1DF8"
},
"cell_type" : "code",
"source" : [ "" ],
"outputs" : [ {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 4,
"time" : "Took: 2.983s, at 2018-06-22 00:23"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "B693A823098042EAA40397930B659A12"
},
"cell_type" : "code",
"source" : [ "" ],
"outputs" : [ {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 5,
"time" : "Took: 2.101s, at 2018-06-22 00:23"
} ]
} ],
"nbformat" : 4
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment