Commit e66a9792 authored by AREZKI HAFID's avatar AREZKI HAFID
Browse files

premiere version

parents
{
"metadata" : {
"id" : "2f852a9a-018b-4dd9-8c45-f87021dad78a",
"name" : "class_clean.snb.ipynb",
"user_save_timestamp" : "2018-05-22T15:01:51.417Z",
"auto_save_timestamp" : "1970-01-01T01:00:00.000Z",
"language_info" : {
"name" : "scala",
"file_extension" : "scala",
"codemirror_mode" : "text/x-scala"
},
"trusted" : true,
"sparkNotebook" : null,
"customLocalRepo" : null,
"customRepos" : null,
"customDeps" : null,
"customImports" : null,
"customArgs" : null,
"customSparkConf" : null,
"customVars" : null
},
"cells" : [ {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "84A72D1FD7EB49888C7DF4242EB01AED"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.ml.feature.PCA\n", "import org.apache.spark.ml.linalg.Vectors\n", "import org.apache.spark.SparkContext\n", "import org.apache.spark.mllib.linalg.{Matrix, Matrices}\n", "import org.apache.spark.mllib.linalg.Vector\n", "import org.apache.spark.mllib.linalg.distributed.RowMatrix\n", "\n", "val spark = SparkSession \n", ".builder \n", ".appName (\"WorldBankIndex\") \n", ".getOrCreate ()\n" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.ml.feature.PCA\nimport org.apache.spark.ml.linalg.Vectors\nimport org.apache.spark.SparkContext\nimport org.apache.spark.mllib.linalg.{Matrix, Matrices}\nimport org.apache.spark.mllib.linalg.Vector\nimport org.apache.spark.mllib.linalg.distributed.RowMatrix\nspark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@71304681\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 1,
"time" : "Took: 2.822s, at 2018-05-29 13:24"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : false,
"id" : "BBE39DAF284C4BAD809FCD5DA69084A1"
},
"cell_type" : "code",
"source" : [ "import org.apache.spark.mllib.linalg.Matrix\n", "import org.apache.spark.mllib.linalg.Vectors\n", "import org.apache.spark.mllib.linalg.distributed.RowMatrix\n", "import org.apache.spark.mllib.linalg.{Matrix, Matrices}\n", "import org.apache.spark.ml.feature.PCA\n", "import scala.util.control.Breaks._\n", "\n", "import math._\n", "import Array._\n", "\n", "class GTM\n", "{ \n", " val sigma = 1\n", " val max_iter=10\n", " val method = \"mean\" \n", " \n", " def distance(xs: Array[Double], ys: Array[Double]) = {\n", " sqrt((xs zip ys).map { case (x,y) => pow(y - x, 2) }.sum)\n", " } \n", "\n", " def cdist(xs: Double, ys: Double) = {\n", " sqrt(( pow(ys - xs, 2) ))\n", " } \n", "\n", " def get_lattice_points(n_grid:Int): Array[Array[Double]] ={\n", " val r = scala.util.Random\n", " val mat= Array.range(0,n_grid).map(i => Array.range(0,3).map(j =>r.nextDouble() ))\n", " return mat.transpose \n", " } \n", "\n", "//def init()\n", "\n", "//generate map \n", " val z = get_lattice_points(3)\n", " val rbfs = get_lattice_points(2)\n", " val dd = z.flatMap(x =>rbfs.map(y => {distance(x,y)}))\n", " val phi = dd.map({case (t:Double) => (exp(-t)/2*sigma) })\n", " val r = scala.util.Random\n", "\n", "//init W and beta from PCA\n", " val data = Array(\n", " Vectors.dense(Array.range(0,3).map(i => r.nextDouble())),\n", " Vectors.dense(Array.range(0,3).map(i => r.nextDouble())),\n", " Vectors.dense(Array.range(0,3).map(i => r.nextDouble())) )\n", "\n", " val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF(\"features\")\n", "\n", " val pca = new PCA()\n", " .setInputCol(\"features\")\n", " .setOutputCol(\"pcaFeatures\")\n", " .setK(3)\n", " .fit(df)\n", "//val result = pca.transform(df).select(\"pcaFeatures\")\n", "//result.show(false)\n", "\n", " import breeze.linalg._\n", "\n", " val pcc = pca.pc\n", " val z2 = DenseMatrix(z.map(_.toArray):_*) \n", " val phi2 = DenseMatrix(phi.map(_.toDouble):_*)\n", " val phinv = pinv(phi2)\n", " val w = z2.toArray * phinv.toArray *:* pcc.toArray.patch(9, Nil, 1)\n", "\n", " val betainv1 = pca.explainedVariance(2)\n", " val dot1 =phi * w\n", " val dot2 =phi * w\n", " val inter_dist1 =dot1.map(x=>dot2.map(y=> cdist(x,y)))\n", " val inter_dist = DenseMatrix(inter_dist1.map(_.toArray):_*) \n", "//diagonal\n", " List.range(0,inter_dist.rows).flatMap(i =>List.range(0,inter_dist.cols)\n", " .map(j=> if (j==i) \n", " inter_dist(i,j) = Double.PositiveInfinity))\n", " \n", " val betainv2 = inter_dist.toArray.reduceLeft(_ min _)/2\n", " val beta = 1 / max(betainv1, betainv2) \n", "\n", " //resposability(X : rbfs) \n", " val p1 = exp(-beta/2) \n", " val temp1 = DenseMatrix(rbfs.map(_.toArray):_*) \n", " val temp2 = temp1.toArray\n", " val p2 = dot1.map(x =>temp2.map(y => {cdist(x,y)}))\n", "\n", " val temp3 = DenseMatrix(p2.map(_.toArray):_*) \n", " //val pp2 = temp3.toArray\n", " //val p = pp2.map(x => x * p1)\n", "\n", " //val p3 = DenseMatrix(p:_*) \n", " //val somme = sum(p3 ,Axis._1 )\n", " //val re = p3.map( j => somme.map(i => j / i))\n", " val temp5 = DenseMatrix(z.map(_.toArray):_*) \n", "\n", "\n", " def responsability():breeze.linalg.DenseMatrix[Double]=\n", " {\n", " //val p1 = exp(-beta/2) \n", " //val dot1 =phi * w\n", " //val temp1 = DenseMatrix(rbfs.map(_.toArray):_*) \n", " //val temp2 = temp1.toArray\n", " // val p2 = dot1.map(x =>temp2.map(y => {cdist(x,y)}))\n", " //val temp3 = DenseMatrix(p2.map(_.toArray):_*) \n", " val pp2 = temp3.toArray\n", " val p = pp2.map(x => x * p1)\n", "\n", " val p3 = DenseMatrix(p:_*) \n", " val somme = sum(p3 ,Axis._0 )\n", " \n", " val re = p3.map( j => j /:/ somme)\n", " val re1 = re.toArray \n", " val t3 = DenseMatrix(re1.map(_.toArray):_*) \n", " return t3.reshape(27,2)\n", " \n", " }\n", "\n", "def likelihood () :Double=\n", " {\n", " val R = responsability() \n", " val D = temp1.cols\n", " val k1 = (D /2) * log(beta / 2* Pi) \n", " val k2 = temp3.map(i => i * (-beta /2)) \n", " return sum(R.reshape(6,9) * (k2 +k1))\n", " }\n", " \n", "def fit() \n", " {\n", " val tol=1e-3\n", " val verbose = false\n", " val max_iter=10\n", " val alpha=1e-3\n", "\n", "range(1 ,max_iter).foreach(i =>\n", "{ \n", " val R = responsability() \n", " val G = diag(sum(R , Axis._1 ))\n", "\n", " val A1 = G.reshape(81, 9) * phi2 * phi2.t\n", "\n", " val a1 = DenseMatrix.eye[Double](27)\n", " val ze = alpha /beta\n", " val a2 =a1.map( i => i * ze) \n", " val A2 = A1.reshape(27,27) + a2\n", "\n", " val temp4 =phi2.t.reshape(3,3)\n", " // val temp5 = DenseMatrix(z.map(_.toArray):_*) \n", " val A3 = temp4 * temp5 * R.reshape(3, 18)\n", " val W = A2.reshape(3,243) \\ A3 //Solve (A2, A3)\n", " val Beta = temp1.toArray.length / sum(temp3 * R.reshape(6,9))\n", " \n", " \n", " val likelihood1 = likelihood()\n", " var prev_likelihood_ = Double.NegativeInfinity\n", " val diff = abs(likelihood1 - prev_likelihood_) / temp1.rows\n", " prev_likelihood_ = likelihood1\n", " \n", " if (verbose)\n", " print(i+1, likelihood , diff)\n", " \n", " if (diff < tol)\n", " if (verbose)\n", " print(\"converged\")\n", " break\n", " })\n", " }\n", " \n", "def transform() :DenseMatrix[Double]=\n", " {\n", " assert (method == \"mean\") \n", " val R = responsability()\n", " \n", " if (method == \"mean\") (temp5.t * R.reshape(3, 18)).t \n", " else \n", " temp1(argmax(argmax(responsability(), Axis._0)),::).t.toDenseMatrix \n", "}\n", " \n", " \n", "def inverse_transform(): Array[Double]=\n", " {\n", " val d = z.transpose.flatMap(i => rbfs.map(j=> distance(i, j)))\n", " val PHI = d.map({case (t:Double) => (exp(-t)/2*sigma) })\n", " return w * PHI\n", " }\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "}" ],
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : "import org.apache.spark.mllib.linalg.Matrix\nimport org.apache.spark.mllib.linalg.Vectors\nimport org.apache.spark.mllib.linalg.distributed.RowMatrix\nimport org.apache.spark.mllib.linalg.{Matrix, Matrices}\nimport org.apache.spark.ml.feature.PCA\nimport scala.util.control.Breaks._\nimport math._\nimport Array._\ndefined class GTM\n"
}, {
"metadata" : { },
"data" : {
"text/html" : ""
},
"output_type" : "execute_result",
"execution_count" : 2,
"time" : "Took: 8.452s, at 2018-05-29 13:24"
} ]
}, {
"metadata" : {
"trusted" : true,
"input_collapsed" : false,
"collapsed" : true,
"id" : "9451B261D2494585AE2ACA652A554E1C"
},
"cell_type" : "code",
"source" : [ "" ],
"outputs" : [ ]
} ],
"nbformat" : 4
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment