{"id":1335,"date":"2026-01-12T11:05:55","date_gmt":"2026-01-12T03:05:55","guid":{"rendered":"https:\/\/www.52runoob.com\/?p=1335"},"modified":"2026-01-12T11:05:55","modified_gmt":"2026-01-12T03:05:55","slug":"%e5%a4%a7%e6%95%b0%e6%8d%aespark%ef%bc%88%e4%b8%83%e5%8d%81%e5%9b%9b%ef%bc%89%ef%bc%9atransformation%e8%bd%ac%e6%8d%a2%e7%ae%97%e5%ad%90aggregatebykey%e5%92%8ccombinebykey%e4%bd%bf%e7%94%a8%e6%a1%88","status":"publish","type":"post","link":"https:\/\/www.52runoob.com\/index.php\/2026\/01\/12\/%e5%a4%a7%e6%95%b0%e6%8d%aespark%ef%bc%88%e4%b8%83%e5%8d%81%e5%9b%9b%ef%bc%89%ef%bc%9atransformation%e8%bd%ac%e6%8d%a2%e7%ae%97%e5%ad%90aggregatebykey%e5%92%8ccombinebykey%e4%bd%bf%e7%94%a8%e6%a1%88\/","title":{"rendered":"\u5927\u6570\u636eSpark\uff08\u4e03\u5341\u56db\uff09\uff1aTransformation\u8f6c\u6362\u7b97\u5b50aggregateByKey\u548ccombineByKey\u4f7f\u7528\u6848\u4f8b"},"content":{"rendered":"\n<p>\u4e0b\u9762\u662f\u4e00\u7bc7<strong>\u98ce\u683c\u3001\u7ed3\u6784\u4e0e\u300aSpark\uff08\u4e03\u5341\u4e94\uff09\u300b\u5b8c\u5168\u4e00\u81f4<\/strong>\u7684\u5b8c\u6574\u6559\u7a0b\u6587\u7ae0\uff0c\u53ef\u76f4\u63a5\u4f5c\u4e3a\u4f60 Spark \u7cfb\u5217\u7684\u7b2c <strong>\u4e03\u5341\u56db\u7bc7<\/strong> \u53d1\u5e03 \ud83d\udc47<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h1 class=\"wp-block-heading\">\u5927\u6570\u636e Spark\uff08\u4e03\u5341\u56db\uff09\uff1aTransformation \u8f6c\u6362\u7b97\u5b50 aggregateByKey \u548c combineByKey \u4f7f\u7528\u6848\u4f8b<\/h1>\n\n\n\n<p>\u5728 Spark \u4e2d\uff0c<strong>Key-Value \u7c7b\u578b RDD<\/strong> \u7684\u805a\u5408\u64cd\u4f5c\u662f\u9762\u8bd5\u4e0e\u5b9e\u6218\u4e2d\u7684\u91cd\u70b9\u5185\u5bb9\u3002<br>\u672c\u7bc7\u5c06\u7cfb\u7edf\u8bb2\u89e3\u4e24\u4e2a\u201c\u770b\u8d77\u6765\u590d\u6742\u3001\u4f46\u975e\u5e38\u5f3a\u5927\u201d\u7684 <strong>Transformation \u8f6c\u6362\u7b97\u5b50<\/strong>\uff1a<\/p>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p><strong>aggregateByKey<\/strong><br><strong>combineByKey<\/strong><\/p>\n<\/blockquote>\n\n\n\n<p>\u7406\u89e3\u5b83\u4eec\uff0c\u4f60\u5c31\u771f\u6b63\u638c\u63e1\u4e86 Spark \u7684\u805a\u5408\u6a21\u578b\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e00\u3001\u4e3a\u4ec0\u4e48\u9700\u8981 aggregateByKey \u548c combineByKey\uff1f<\/h2>\n\n\n\n<p>\u5728 Key-Value \u805a\u5408\u4e2d\uff0c\u6211\u4eec\u5e38\u89c1\u7684\u7b97\u5b50\u6709\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>reduceByKey<\/li>\n\n\n\n<li>foldByKey<\/li>\n\n\n\n<li>aggregateByKey<\/li>\n\n\n\n<li>combineByKey<\/li>\n<\/ul>\n\n\n\n<p>\u5176\u4e2d\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><code>reduceByKey<\/code>\uff1a<strong>\u5206\u533a\u5185 + \u5206\u533a\u95f4\u903b\u8f91\u5fc5\u987b\u4e00\u81f4<\/strong><\/li>\n\n\n\n<li><code>aggregateByKey<\/code>\uff1a<strong>\u5206\u533a\u5185\u548c\u5206\u533a\u95f4\u903b\u8f91\u53ef\u4ee5\u4e0d\u540c<\/strong><\/li>\n\n\n\n<li><code>combineByKey<\/code>\uff1a<strong>\u6700\u5e95\u5c42\u3001\u6700\u901a\u7528\u7684\u805a\u5408\u7b97\u5b50<\/strong><\/li>\n<\/ul>\n\n\n\n<p>\ud83d\udc49 Spark \u4e2d<strong>\u6240\u6709 Key \u805a\u5408\u7b97\u5b50\uff0c\u5e95\u5c42\u90fd\u57fa\u4e8e combineByKey<\/strong>\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e8c\u3001aggregateByKey \u7b97\u5b50\u8be6\u89e3<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">1\ufe0f\u20e3 \u7b97\u5b50\u5b9a\u4e49<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\ndef aggregateByKey&#x5B;U](\n  zeroValue: U\n)(\n  seqOp: (U, V) =&gt; U,\n  combOp: (U, U) =&gt; U\n): RDD&#x5B;(K, U)]\n\n<\/pre><\/div>\n\n\n<h3 class=\"wp-block-heading\">\u53c2\u6570\u8bf4\u660e<\/h3>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>\u53c2\u6570<\/th><th>\u8bf4\u660e<\/th><\/tr><\/thead><tbody><tr><td>zeroValue<\/td><td>\u521d\u59cb\u503c\uff08\u6bcf\u4e2a\u5206\u533a\u90fd\u4f1a\u7528\u4e00\u6b21\uff09<\/td><\/tr><tr><td>seqOp<\/td><td>\u5206\u533a\u5185\u805a\u5408\u903b\u8f91<\/td><\/tr><tr><td>combOp<\/td><td>\u5206\u533a\u95f4\u805a\u5408\u903b\u8f91<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">2\ufe0f\u20e3 \u6267\u884c\u6d41\u7a0b\u56fe\u89e3\uff08\u6587\u5b57\u7248\uff09<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u6bcf\u4e2a\u5206\u533a<\/strong>\u521b\u5efa\u4e00\u4e2a <code>zeroValue<\/code><\/li>\n\n\n\n<li>\u4f7f\u7528 <code>seqOp<\/code> \u5728\u5206\u533a\u5185\u805a\u5408<\/li>\n\n\n\n<li>\u4f7f\u7528 <code>combOp<\/code> \u5728\u5206\u533a\u95f4\u5408\u5e76\u7ed3\u679c<\/li>\n<\/ol>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">3\ufe0f\u20e3 \u4f7f\u7528\u6848\u4f8b\u4e00\uff1a\u6c42\u6bcf\u4e2a key \u7684\u6700\u5927\u503c\uff08\u5e76\u52a0 10\uff09<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\nval conf = new SparkConf().setMaster(&quot;local&#x5B;*]&quot;).setAppName(&quot;AggregateByKeyDemo&quot;)\nval sc = new SparkContext(conf)\n\nval rdd = sc.parallelize(\n  List(\n    (&quot;a&quot;, 1), (&quot;a&quot;, 3), (&quot;a&quot;, 2),\n    (&quot;b&quot;, 2), (&quot;b&quot;, 4)\n  ), 2\n)\n\nval result = rdd.aggregateByKey(0)(\n  (x, y) =&gt; math.max(x, y),   \/\/ \u5206\u533a\u5185\n  (x, y) =&gt; x + y             \/\/ \u5206\u533a\u95f4\n)\n\nresult.collect().foreach(println)\n\nsc.stop()\n\n<\/pre><\/div>\n\n\n<p><strong>\u6267\u884c\u903b\u8f91\u8bf4\u660e\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u5206\u533a\u5185\u53d6\u6700\u5927\u503c<\/li>\n\n\n\n<li>\u5206\u533a\u95f4\u5bf9\u6700\u5927\u503c\u6c42\u548c<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">4\ufe0f\u20e3 \u4f7f\u7528\u6848\u4f8b\u4e8c\uff1a\u6c42\u5e73\u5747\u503c\uff08\u5e38\u89c1\u9762\u8bd5\u9898\uff09<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\nval rdd = sc.parallelize(\n  List((&quot;a&quot;, 1), (&quot;a&quot;, 2), (&quot;a&quot;, 3), (&quot;b&quot;, 2), (&quot;b&quot;, 4)), 2\n)\n\nval result = rdd.aggregateByKey((0, 0))(\n  (acc, value) =&gt; (acc._1 + value, acc._2 + 1),\n  (acc1, acc2) =&gt; (acc1._1 + acc2._1, acc1._2 + acc2._2)\n)\n\nval avg = result.mapValues {\n  case (sum, count) =&gt; sum.toDouble \/ count\n}\n\navg.collect().foreach(println)\n\n<\/pre><\/div>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e09\u3001combineByKey \u7b97\u5b50\u8be6\u89e3<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">1\ufe0f\u20e3 \u7b97\u5b50\u5b9a\u4e49<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\ndef combineByKey&#x5B;C](\n  createCombiner: V =&gt; C,\n  mergeValue: (C, V) =&gt; C,\n  mergeCombiners: (C, C) =&gt; C\n): RDD&#x5B;(K, C)]\n\n<\/pre><\/div>\n\n\n<h3 class=\"wp-block-heading\">\u53c2\u6570\u8bf4\u660e<\/h3>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>\u53c2\u6570<\/th><th>\u8bf4\u660e<\/th><\/tr><\/thead><tbody><tr><td>createCombiner<\/td><td>\u7b2c\u4e00\u4e2a value \u53d8\u6210\u521d\u59cb\u7ed3\u6784<\/td><\/tr><tr><td>mergeValue<\/td><td>\u5206\u533a\u5185\u5408\u5e76<\/td><\/tr><tr><td>mergeCombiners<\/td><td>\u5206\u533a\u95f4\u5408\u5e76<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">2\ufe0f\u20e3 combineByKey \u6267\u884c\u6838\u5fc3\u601d\u60f3<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u6bcf\u4e2a key \u7684\u7b2c\u4e00\u4e2a value \u89e6\u53d1 createCombiner<\/strong><\/li>\n\n\n\n<li>\u540e\u7eed value \u4f7f\u7528 mergeValue<\/li>\n\n\n\n<li>\u4e0d\u540c\u5206\u533a\u7ed3\u679c\u7528 mergeCombiners \u5408\u5e76<\/li>\n<\/ul>\n\n\n\n<p>\ud83d\udc49 \u6ca1\u6709 <code>zeroValue<\/code>\uff0c\u907f\u514d\u65e0\u610f\u4e49\u521d\u59cb\u503c\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">3\ufe0f\u20e3 \u4f7f\u7528\u6848\u4f8b\uff1a\u6c42\u6bcf\u4e2a key \u7684\u5e73\u5747\u503c<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\nval rdd = sc.parallelize(\n  List((&quot;a&quot;, 1), (&quot;a&quot;, 2), (&quot;a&quot;, 3), (&quot;b&quot;, 2), (&quot;b&quot;, 4)), 2\n)\n\nval result = rdd.combineByKey(\n  v =&gt; (v, 1),\n  (acc: (Int, Int), v) =&gt; (acc._1 + v, acc._2 + 1),\n  (acc1: (Int, Int), acc2: (Int, Int)) =&gt;\n    (acc1._1 + acc2._1, acc1._2 + acc2._2)\n)\n\nval avg = result.mapValues {\n  case (sum, count) =&gt; sum.toDouble \/ count\n}\n\navg.collect().foreach(println)\n\n<\/pre><\/div>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u56db\u3001aggregateByKey vs combineByKey \u5bf9\u6bd4<\/h2>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>\u7ef4\u5ea6<\/th><th>aggregateByKey<\/th><th>combineByKey<\/th><\/tr><\/thead><tbody><tr><td>\u521d\u59cb\u503c<\/td><td>\u6709 zeroValue<\/td><td>\u65e0<\/td><\/tr><tr><td>\u7075\u6d3b\u6027<\/td><td>\u9ad8<\/td><td><strong>\u6700\u9ad8<\/strong><\/td><\/tr><tr><td>\u4f7f\u7528\u96be\u5ea6<\/td><td>\u4e2d<\/td><td>\u9ad8<\/td><\/tr><tr><td>\u5e95\u5c42\u5b9e\u73b0<\/td><td>\u57fa\u4e8e combineByKey<\/td><td>\u539f\u751f<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e94\u3001\u4f7f\u7528\u5efa\u8bae\u4e0e\u6700\u4f73\u5b9e\u8df5<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">\u2705 \u4ec0\u4e48\u65f6\u5019\u7528 aggregateByKey\uff1f<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u9700\u8981\u4e0d\u540c\u7684\u5206\u533a\u5185 \/ \u5206\u533a\u95f4\u903b\u8f91<\/li>\n\n\n\n<li>\u9700\u8981\u7edf\u4e00\u521d\u59cb\u503c<\/li>\n\n\n\n<li>\u5199\u6cd5\u76f8\u5bf9\u6e05\u6670<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">\u2705 \u4ec0\u4e48\u65f6\u5019\u7528 combineByKey\uff1f<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u805a\u5408\u903b\u8f91\u590d\u6742<\/li>\n\n\n\n<li>\u521d\u59cb\u503c\u4e0d\u80fd\u968f\u610f\u7ed9<\/li>\n\n\n\n<li>\u9700\u8981\u5b8c\u5168\u638c\u63a7\u805a\u5408\u8fc7\u7a0b<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u516d\u3001\u603b\u7ed3<\/h2>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p><strong>reduceByKey \u662f\u7b80\u5316\u7248\uff0caggregateByKey \u662f\u8fdb\u9636\u7248\uff0ccombineByKey \u662f\u7ec8\u6781\u7248\u3002<\/strong><\/p>\n<\/blockquote>\n\n\n\n<p>\u771f\u6b63\u7406\u89e3\u8fd9\u4e09\u4e2a\u7b97\u5b50\uff0c\u4f60\u5c31\u771f\u6b63\u7406\u89e3\u4e86 Spark \u7684 <strong>Shuffle + \u805a\u5408\u673a\u5236<\/strong>\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4e0b\u9762\u662f\u4e00\u7bc7\u98ce\u683c\u3001\u7ed3\u6784\u4e0e\u300aSpark\uff08\u4e03\u5341\u4e94\uff09\u300b\u5b8c\u5168\u4e00\u81f4\u7684\u5b8c\u6574\u6559\u7a0b\u6587\u7ae0\uff0c\u53ef\u76f4\u63a5\u4f5c\u4e3a&#8230; <a class=\"more-link\" href=\"https:\/\/www.52runoob.com\/index.php\/2026\/01\/12\/%e5%a4%a7%e6%95%b0%e6%8d%aespark%ef%bc%88%e4%b8%83%e5%8d%81%e5%9b%9b%ef%bc%89%ef%bc%9atransformation%e8%bd%ac%e6%8d%a2%e7%ae%97%e5%ad%90aggregatebykey%e5%92%8ccombinebykey%e4%bd%bf%e7%94%a8%e6%a1%88\/\">Continue Reading &rarr;<\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[12],"tags":[],"class_list":["post-1335","post","type-post","status-publish","format-standard","hentry","category-12"],"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/posts\/1335","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/comments?post=1335"}],"version-history":[{"count":1,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/posts\/1335\/revisions"}],"predecessor-version":[{"id":1336,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/posts\/1335\/revisions\/1336"}],"wp:attachment":[{"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/media?parent=1335"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/categories?post=1335"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.52runoob.com\/index.php\/wp-json\/wp\/v2\/tags?post=1335"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}