{"id":1337,"date":"2026-01-12T11:15:10","date_gmt":"2026-01-12T03:15:10","guid":{"rendered":"https:\/\/www.52runoob.com\/?p=1337"},"modified":"2026-01-12T11:15:10","modified_gmt":"2026-01-12T03:15:10","slug":"%e5%a4%a7%e6%95%b0%e6%8d%aespark%ef%bc%88%e4%b8%83%e5%8d%81%e4%b8%89%ef%bc%89%ef%bc%9atransformation%e8%bd%ac%e6%8d%a2%e7%ae%97%e5%ad%90glom%e5%92%8cfoldbykey%e4%bd%bf%e7%94%a8%e6%a1%88%e4%be%8b","status":"publish","type":"post","link":"https:\/\/www.52runoob.com\/index.php\/2026\/01\/12\/%e5%a4%a7%e6%95%b0%e6%8d%aespark%ef%bc%88%e4%b8%83%e5%8d%81%e4%b8%89%ef%bc%89%ef%bc%9atransformation%e8%bd%ac%e6%8d%a2%e7%ae%97%e5%ad%90glom%e5%92%8cfoldbykey%e4%bd%bf%e7%94%a8%e6%a1%88%e4%be%8b\/","title":{"rendered":"\u5927\u6570\u636eSpark\uff08\u4e03\u5341\u4e09\uff09\uff1aTransformation\u8f6c\u6362\u7b97\u5b50glom\u548cfoldByKey\u4f7f\u7528\u6848\u4f8b"},"content":{"rendered":"\n<p>\u4e0b\u9762\u662f\u4e00\u7bc7<strong>\u4e0e\u4f60 Spark \u7cfb\u5217\u98ce\u683c\u7edf\u4e00\u3001\u53ef\u76f4\u63a5\u53d1\u5e03<\/strong>\u7684\u7b2c <strong>\u4e03\u5341\u4e09\u7bc7<\/strong>\u6559\u7a0b\u6587\u7ae0 \ud83d\udc47<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h1 class=\"wp-block-heading\">\u5927\u6570\u636e Spark\uff08\u4e03\u5341\u4e09\uff09\uff1aTransformation \u8f6c\u6362\u7b97\u5b50 glom \u548c foldByKey \u4f7f\u7528\u6848\u4f8b<\/h1>\n\n\n\n<p>\u5728 Spark \u7684 <strong>Transformation \u8f6c\u6362\u7b97\u5b50<\/strong>\u4e2d\uff0c\u6709\u4e9b\u7b97\u5b50\u4e0d\u5e38\u7528\uff0c\u4f46<strong>\u4e00\u65e6\u7528\u5230\u5c31\u975e\u5e38\u5173\u952e<\/strong>\u3002<br>\u672c\u7bc7\u6211\u4eec\u91cd\u70b9\u8bb2\u89e3\u4e24\u4e2a\u975e\u5e38\u5178\u578b\u3001\u4f46\u4f7f\u7528\u573a\u666f\u5b8c\u5168\u4e0d\u540c\u7684\u7b97\u5b50\uff1a<\/p>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p><strong>glom<\/strong> \u2014\u2014 \u9762\u5411\u5206\u533a\u7684\u6570\u636e\u89c2\u5bdf\u7b97\u5b50<br><strong>foldByKey<\/strong> \u2014\u2014 \u5e26\u521d\u59cb\u503c\u7684 Key \u805a\u5408\u7b97\u5b50<\/p>\n<\/blockquote>\n\n\n\n<p>\u7406\u89e3\u5b83\u4eec\uff0c\u6709\u52a9\u4e8e\u4f60\u771f\u6b63\u7406\u89e3 <strong>Spark \u7684\u5206\u533a\u673a\u5236\u548c Key \u805a\u5408\u8fc7\u7a0b<\/strong>\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e00\u3001glom \u7b97\u5b50\u8be6\u89e3<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">1\ufe0f\u20e3 glom \u662f\u4ec0\u4e48\uff1f<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\ndef glom(): RDD&#x5B;Array&#x5B;T]]\n\n<\/pre><\/div>\n\n\n<p><code>glom<\/code> \u7684\u4f5c\u7528\u662f\uff1a<\/p>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p><strong>\u5c06\u6bcf\u4e2a\u5206\u533a\u4e2d\u7684\u6240\u6709\u5143\u7d20\uff0c\u8f6c\u6362\u6210\u4e00\u4e2a\u6570\u7ec4<\/strong><\/p>\n<\/blockquote>\n\n\n\n<p>\u4e5f\u5c31\u662f\u8bf4\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u4e00\u4e2a\u5206\u533a \u2192 \u4e00\u4e2a <code>Array<\/code><\/li>\n\n\n\n<li>\u5143\u7d20\u4e0d\u53d8\uff0c\u7ed3\u6784\u6539\u53d8<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">2\ufe0f\u20e3 glom \u7684\u6838\u5fc3\u7279\u70b9<\/h3>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>\u7279\u70b9<\/th><th>\u8bf4\u660e<\/th><\/tr><\/thead><tbody><tr><td>\u4f5c\u7528\u5bf9\u8c61<\/td><td>\u5206\u533a<\/td><\/tr><tr><td>\u662f\u5426\u89e6\u53d1 Shuffle<\/td><td>\u274c \u4e0d\u4f1a<\/td><\/tr><tr><td>\u662f\u5426\u6539\u53d8\u5206\u533a\u6570<\/td><td>\u274c \u4e0d\u4f1a<\/td><\/tr><tr><td>\u5e38\u89c1\u7528\u9014<\/td><td>\u67e5\u770b\u5206\u533a\u6570\u636e\u3001\u5206\u533a\u5185\u7edf\u8ba1<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">3\ufe0f\u20e3 \u4f7f\u7528\u6848\u4f8b\uff1a\u67e5\u770b\u6bcf\u4e2a\u5206\u533a\u7684\u6570\u636e<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\nval conf = new SparkConf().setMaster(&quot;local&#x5B;*]&quot;).setAppName(&quot;GlomDemo&quot;)\nval sc = new SparkContext(conf)\n\nval rdd = sc.parallelize(1 to 10, 3)\n\nval result = rdd.glom()\n\nresult.collect().foreach(arr =&gt; {\n  println(arr.mkString(&quot;,&quot;))\n})\n\nsc.stop()\n\n<\/pre><\/div>\n\n\n<p><strong>\u793a\u4f8b\u8f93\u51fa\uff1a<\/strong><\/p>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\n1,2,3\n4,5,6\n7,8,9,10\n\n<\/pre><\/div>\n\n\n<p>\ud83d\udc49 \u975e\u5e38\u9002\u5408\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u8c03\u8bd5\u5206\u533a\u662f\u5426\u5747\u5300<\/li>\n\n\n\n<li>\u9a8c\u8bc1 repartition \/ coalesce \u6548\u679c<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">4\ufe0f\u20e3 \u4f7f\u7528\u6848\u4f8b\uff1a\u8ba1\u7b97\u6bcf\u4e2a\u5206\u533a\u7684\u6700\u5927\u503c<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\nval rdd = sc.parallelize(1 to 10, 3)\n\nval maxPerPartition = rdd.glom().map(arr =&gt; arr.max)\n\nmaxPerPartition.collect().foreach(println)\n\n<\/pre><\/div>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">\u26a0\ufe0f glom \u4f7f\u7528\u6ce8\u610f\u4e8b\u9879<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u4f1a\u628a\u5206\u533a\u6570\u636e\u4e00\u6b21\u6027\u52a0\u8f7d\u5230\u5185\u5b58<\/li>\n\n\n\n<li><strong>\u4e0d\u9002\u5408\u6570\u636e\u91cf\u7279\u522b\u5927\u7684\u5206\u533a<\/strong><\/li>\n\n\n\n<li>\u66f4\u504f\u5411 <strong>\u8c03\u8bd5 \/ \u5206\u6790\u7528\u9014<\/strong><\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e8c\u3001foldByKey \u7b97\u5b50\u8be6\u89e3<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">1\ufe0f\u20e3 foldByKey \u662f\u4ec0\u4e48\uff1f<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\ndef foldByKey(zeroValue: V)(func: (V, V) =&gt; V): RDD&#x5B;(K, V)]\n\n<\/pre><\/div>\n\n\n<p><code>foldByKey<\/code> \u662f\u4e00\u4e2a <strong>Key-Value \u805a\u5408\u7b97\u5b50<\/strong>\uff0c\u53ef\u4ee5\u770b\u4f5c\uff1a<\/p>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p><strong>\u5e26\u521d\u59cb\u503c\u7684 reduceByKey<\/strong><\/p>\n<\/blockquote>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">2\ufe0f\u20e3 foldByKey \u4e0e reduceByKey \u7684\u533a\u522b<\/h3>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>\u7b97\u5b50<\/th><th>\u662f\u5426\u6709\u521d\u59cb\u503c<\/th><th>\u805a\u5408\u903b\u8f91<\/th><\/tr><\/thead><tbody><tr><td>reduceByKey<\/td><td>\u274c<\/td><td>\u5206\u533a\u5185 = \u5206\u533a\u95f4<\/td><\/tr><tr><td>foldByKey<\/td><td>\u2705<\/td><td>\u5206\u533a\u5185 = \u5206\u533a\u95f4<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p>\u26a0\ufe0f \u6ce8\u610f\uff1a<br><code>foldByKey<\/code> <strong>\u5206\u533a\u5185\u548c\u5206\u533a\u95f4\u4f7f\u7528\u7684\u662f\u540c\u4e00\u4e2a\u51fd\u6570<\/strong>\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">3\ufe0f\u20e3 \u4f7f\u7528\u6848\u4f8b\uff1a\u6309 key \u6c42\u548c\uff08\u521d\u59cb\u503c = 0\uff09<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\nval conf = new SparkConf().setMaster(&quot;local&#x5B;*]&quot;).setAppName(&quot;FoldByKeyDemo&quot;)\nval sc = new SparkContext(conf)\n\nval rdd = sc.parallelize(\n  List((&quot;a&quot;, 1), (&quot;a&quot;, 2), (&quot;b&quot;, 3), (&quot;b&quot;, 4)), 2\n)\n\nval result = rdd.foldByKey(0)(_ + _)\n\nresult.collect().foreach(println)\n\nsc.stop()\n\n<\/pre><\/div>\n\n\n<p><strong>\u8f93\u51fa\u7ed3\u679c\uff1a<\/strong><\/p>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\n(a,3)\n(b,7)\n\n<\/pre><\/div>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">4\ufe0f\u20e3 \u4f7f\u7528\u6848\u4f8b\uff1a\u6309 key \u6c42\u6700\u5927\u503c\uff08\u521d\u59cb\u503c = Int.MinValue\uff09<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\nval rdd = sc.parallelize(\n  List((&quot;a&quot;, 3), (&quot;a&quot;, 1), (&quot;b&quot;, 5), (&quot;b&quot;, 2)), 2\n)\n\nval result = rdd.foldByKey(Int.MinValue)(math.max)\n\nresult.collect().foreach(println)\n\n<\/pre><\/div>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">\u26a0\ufe0f foldByKey \u521d\u59cb\u503c\u7684\u91cd\u8981\u6027<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><code>zeroValue<\/code> <strong>\u6bcf\u4e2a\u5206\u533a\u90fd\u4f1a\u4f7f\u7528\u4e00\u6b21<\/strong><\/li>\n\n\n\n<li>\u9009\u9519\u521d\u59cb\u503c\u4f1a\u5bfc\u81f4\u7ed3\u679c\u9519\u8bef<\/li>\n<\/ul>\n\n\n\n<p>\u274c \u9519\u8bef\u793a\u4f8b\uff08\u6700\u5927\u503c\u7528 0\uff09\uff1a<\/p>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\nfoldByKey(0)(math.max)\n\n<\/pre><\/div>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e09\u3001glom + foldByKey \u7efc\u5408\u7406\u89e3<\/h2>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>\u7b97\u5b50<\/th><th>\u6838\u5fc3\u5173\u6ce8\u70b9<\/th><\/tr><\/thead><tbody><tr><td>glom<\/td><td>\u5206\u533a\u5185\u7684\u6570\u636e\u7ed3\u6784<\/td><\/tr><tr><td>foldByKey<\/td><td>Key \u7684\u805a\u5408\u89c4\u5219<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p>\ud83d\udc49 \u4e00\u4e2a\u504f\u5411 <strong>\u5206\u533a\u89c6\u89d2<\/strong><br>\ud83d\udc49 \u4e00\u4e2a\u504f\u5411 <strong>Key \u805a\u5408\u89c6\u89d2<\/strong><\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u56db\u3001\u4f7f\u7528\u5efa\u8bae\u4e0e\u6700\u4f73\u5b9e\u8df5<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">\u2705 \u4ec0\u4e48\u65f6\u5019\u7528 glom\uff1f<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u8c03\u8bd5\u5206\u533a\u6570\u636e<\/li>\n\n\n\n<li>\u67e5\u770b\u5206\u533a\u662f\u5426\u503e\u659c<\/li>\n\n\n\n<li>\u5c0f\u6570\u636e\u573a\u666f\u4e0b\u7684\u5206\u533a\u7edf\u8ba1<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">\u2705 \u4ec0\u4e48\u65f6\u5019\u7528 foldByKey\uff1f<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u9700\u8981\u521d\u59cb\u503c\u7684 Key \u805a\u5408<\/li>\n\n\n\n<li>\u805a\u5408\u903b\u8f91\u7b80\u5355\u7edf\u4e00<\/li>\n\n\n\n<li>\u4e0d\u60f3\u5199 aggregateByKey \/ combineByKey<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e94\u3001\u603b\u7ed3<\/h2>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>glom<\/strong>\uff1a<br>\ud83d\udc49 \u770b\u6e05 Spark \u7684\u201c\u5206\u533a\u957f\u4ec0\u4e48\u6837\u201d<\/li>\n\n\n\n<li><strong>foldByKey<\/strong>\uff1a<br>\ud83d\udc49 \u5e26\u521d\u59cb\u503c\u7684 reduceByKey<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">\u4e00\u53e5\u8bdd\u8bb0\u5fc6<\/h3>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p><strong>glom \u770b\u5206\u533a\uff0cfoldByKey \u805a\u5408 Key\u3002<\/strong><\/p>\n<\/blockquote>\n","protected":false},"excerpt":{"rendered":"<p>\u4e0b\u9762\u662f\u4e00\u7bc7\u4e0e\u4f60 Spark \u7cfb\u5217\u98ce\u683c\u7edf\u4e00\u3001\u53ef\u76f4\u63a5\u53d1\u5e03\u7684\u7b2c \u4e03\u5341\u4e09\u7bc7\u6559\u7a0b\u6587\u7ae0 \ud83d\udc47 &#8230; <a class=\"more-link\" href=\"https:\/\/www.52runoob.com\/index.php\/2026\/01\/12\/%e5%a4%a7%e6%95%b0%e6%8d%aespark%ef%bc%88%e4%b8%83%e5%8d%81%e4%b8%89%ef%bc%89%ef%bc%9atransformation%e8%bd%ac%e6%8d%a2%e7%ae%97%e5%ad%90glom%e5%92%8cfoldbykey%e4%bd%bf%e7%94%a8%e6%a1%88%e4%be%8b\/\">Continue Reading &rarr;<\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[12],"tags":[],"class_list":["post-1337","post","type-post","status-publish","format-standard","hentry","category-12"],"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/posts\/1337","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/comments?post=1337"}],"version-history":[{"count":1,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/posts\/1337\/revisions"}],"predecessor-version":[{"id":1338,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/posts\/1337\/revisions\/1338"}],"wp:attachment":[{"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/media?parent=1337"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/categories?post=1337"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/tags?post=1337"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}