「THUWC 2017」大葱的神力 - 题解-编程知识

忠告：如果你想抄题解的，可以离开，这不是一时半会儿能解决的问题

前置知识：

学习笔记：费用流https://blog.csdn.net/weixin_44043668/article/details/108738212
C++动态规划详解https://blog.csdn.net/weixin_51951103/article/details/120241450
C++ 背包问题https://blog.csdn.net/weixin_43899008/article/details/124738946
模拟退火算法（c++实现）（VIP文章）https://blog.csdn.net/qq547276542/article/details/77800776
TSP--模拟退火算法（c++实现+详细解释）https://blog.csdn.net/qq_40738840/article/details/84324494

测试点1：

观察数据点 $n = m = 5$ 。

$1$ 号， $2$ 号， $3$ 号， $5$ 号物品都可以达到自己的最优选择。而枚举 $4$ 号的物品的摆放会发现只有 $4$ 号物品放入第 1 个背包时才能达到最优解。
最后玩出来的结果是：

测试点2、8、9、10：

测试点2：

观察数据 $n=25,m=8$ 。
理论上可以用搜索 + 剪枝，但是因为我实在太懒了，所以直接写了一个模拟退火。
跑了大概 $3$ 分钟跑出来了最优解 $108$ 。

测试点8、9、10：

观察数据，无规律。

直接写随机化算法吧，模拟退火就是个不错的选择，代码细节我们等会儿来说。

据说这个退火算法坚持跑 $1\sim 2$ 年可以跑出结果。。。

代码：

#include <cmath>
#include <ctime>
#include <queue>
#include <cstdio>
#include <cstdlib>
#include <algorithm>
using namespace std;const int MAXN = 1000; // 定义最大商品数
const int MAXM = 300;  // 定义最大抽屉数
const int INF = (1 << 30); // 定义一个非常大的数，作为无穷大的标记int V1[MAXN + 5], V2[MAXN + 5], val[MAXN + 5][MAXM + 5]; // 商品价值，抽屉大小和商品在抽屉中的价值
int ans[MAXN + 5], tmp[MAXN + 5], nw[MAXN + 5], res = 0, N, M; // 当前结果，临时数组，当前方案，最优解的价值，商品数和抽屉数// 获取当前方案的总价值
int get_ans() {int ret = 0, j = 1, re = V2[1];for (int i = 1; i <= N; i++) {if (j > M)tmp[nw[i]] = 0;else if (re >= V1[nw[i]]) {tmp[nw[i]] = j, re -= V1[nw[i]];ret += val[nw[i]][j];} elsej++, i--, re = V2[j];}return ret;
}int real_ans[MAXN + 5], real_res = 0; // 最优方案和最优价值int main() {srand(time(NULL)); // 使用当前时间来初始化随机数生成器freopen("drawer8.in", "r", stdin); // 从文件读取输入scanf("%d%d", &N, &M); // 读取商品数和抽屉数for (int i = 1; i <= N; i++)scanf("%d", &V1[i]); // 读取每个商品的价值for (int i = 1; i <= M; i++)scanf("%d", &V2[i]); // 读取每个抽屉的大小for (int i = 1; i <= N; i++)for (int j = 1; j <= M; j++)scanf("%d", &val[i][j]); // 读取商品在抽屉里的价值// 使用模拟退火算法来求解while (true) {for (int i = 1; i <= N; i++)nw[i] = i;random_shuffle(nw + 1, nw + N + 1); // 随机生成一种方案res = get_ans(); // 计算当前方案的价值for (double T = 1E12; T >= 1E-5; T *= 0.986) { // 模拟退火过程int x = rand() % N + 1, y = rand() % N + 1;swap(nw[x], nw[y]); // 随机交换两个商品if (get_ans() > res || 1.0 / (1 + exp((res - get_ans()) / T)) >= 1.0 / rand()) {for (int j = 1; j <= N; j++)ans[j] = tmp[j]; // 更新当前最优解res = get_ans(); // 获取新的价值} elseswap(nw[x], nw[y]); // 恢复之前的方案}// 更新历史最优解if (res > real_res) {real_res = res;for (int i = 1; i <= N; i++)real_ans[i] = ans[i]; // 记录新的最优方案FILE *f = fopen("drawer8.out", "w"); // 打开文件用于写入结果for (int i = 1; i <= N; i++)fprintf(f, "%d\n", real_ans[i]); // 将最优方案写入文件printf("%d\n", real_res); // 在控制台输出最优解的价值}}
}

测试点3：

观察数据 $m=1$
很经典的 $01$ 背包，直接写 $dp$ 就可以过了。

代码：

#include <cstdio>
#include <algorithm>
using namespace std;// 定义最大物品数量和最大价值
const int MAXN = 2000;
const int MAXV = 10000;// dp[i][j] 表示考虑前i件物品，总体积不超过j的情况下，可以获得的最大价值
int dp[MAXN + 5][MAXV + 5];
// V[i] 表示第i件物品的体积
int V[MAXN + 5];
// pre[i][j] 表示获取dp[i][j]的最大价值时，是否选择了第i件物品
bool pre[MAXN + 5][MAXV + 5];
// tag[i] 表示最终解中是否选择了第i件物品
bool tag[MAXN + 5];// 递归函数，用于找出选择的物品
void get_ans(int i, int j) {if (i == 0) return; // 如果已经考虑完所有物品，则返回if (pre[i][j]) { // 如果选择了第i件物品tag[i] = true; // 标记为选择get_ans(i - 1, j - V[i]); // 继续考虑前一个物品，容量减少V[i]} else {get_ans(i - 1, j); // 如果没有选择第i件物品，直接考虑前一个物品}
}int main() {int N, M, K;scanf("%d%d", &N, &M); // 读入物品数量N和背包容量M（这里的M未使用）for (int i = 1; i <= N; i++)scanf("%d", &V[i]); // 读入每个物品的体积scanf("%d", &K); // 读入背包的容量限制for (int i = 1; i <= N; i++) {int k;scanf("%d", &k); // 读入每个物品的价值for (int j = K; j >= V[i]; j--) {// 更新dp数组if (dp[i - 1][j - V[i]] + k > dp[i - 1][j]) {dp[i][j] = dp[i - 1][j - V[i]] + k; // 更新最大价值pre[i][j] = true; // 记录选择了这个物品} else {dp[i][j] = dp[i - 1][j]; // 不选择这个物品pre[i][j] = false;}}}get_ans(N, K); // 回溯找出选择的物品for (int i = 1; i <= N; i++)puts(tag[i] ? "1" : "0"); // 输出每个物品是否被选择// printf("%d\n", dp[N][K]); // 可以输出最大价值
}

测试点4、5、6：

测试点4：

观察数据，所有物品的体积都是 $233$ 。

这个时候就可以确定每个背包能装的物品数量是多少，背包容量和物体体积转换为了物品数量限制。

这个时候就是一个最大权匹配，写个费用流就可以了。

测试点5：

观察数据，所有物品的体积都是 $2233$ 。

跟测试点4 一样的解法，只是时间跑得久一些，大概 $5$ 秒。

测试点6：

观察数据，所有物品的体积都是 $19260817$ 在 $19660600 \sim 19660720$ 这个范围以内。

体积不一样，好像并不能再使用费用流了？

但实际上物品之间的体积都是微小扰动造成的，也就是说物品之间的体积差相对于物品体积本身而言非常微小，以至于可以忽略不计。

所以我们直接把物品体积当作所有物品体积的最大值来算就可以了。

代码：

#include <queue>
#include <cstdio>
#include <algorithm>
using namespace std;// 节点和边的最大数目
const int MAXN = 1000;
const int MAXM = 700;
const int MAXV = MAXN + MAXM;
const int INF = (1 << 30);struct edge {int to, flow, cap, dis;edge *nxt, *rev;
} edges[4 * MAXN * MAXM + 5], *adj[MAXV + 5], *cur[MAXV + 5], *ecnt;// 定义一个流图数据结构
struct flow_graph {int S, T, cost, dist[MAXV + 5]; // S源点，T汇点，cost总费用，dist到各点最短费用bool vis[MAXV + 5], inq[MAXV + 5]; // vis检查是否访问过，inq检查是否在队列中deque<int>que; // 双端队列，用于执行SPFA算法// 初始化void init() {ecnt = &edges[0];}// 添加边void addedge(int u, int v, int c, int w) {edge *p = (++ecnt), *q = (++ecnt);p->to = v, p->cap = c, p->dis = w, p->flow = 0;p->nxt = adj[u], adj[u] = p;q->to = u, q->cap = 0, q->dis = -w, q->flow = 0;q->nxt = adj[v], adj[v] = q;p->rev = q, q->rev = p;}// 执行SPFA算法，寻找最短费用的增广路径bool relabel() {for (int i = S; i <= T; i++)dist[i] = INF, cur[i] = adj[i];que.push_back(S);dist[S] = 0;inq[S] = true;while ( !que.empty() ) {int f = que.front();que.pop_front();inq[f] = false;for (edge *p = adj[f]; p != NULL; p = p->nxt) {if ( p->cap > p->flow ) {if ( dist[p->to] > dist[f] + p->dis ) {dist[p->to] = dist[f] + p->dis;if ( !inq[p->to] ) {inq[p->to] = true;if ( !que.empty() && dist[p->to] < dist[que.front()] )que.push_front(p->to);else que.push_back(p->to);}}}}}return !(dist[T] == INF);}// 增广路径// x当前节点，tot要增广的流量int aug(int x, int tot) {if ( x == T ) {cost += tot * dist[T]; // 更新总费用return tot;}int sum = 0;vis[x] = true;for (edge *&p = cur[x]; p != NULL; p = p->nxt) {if ( p->cap > p->flow && !vis[p->to] && dist[x] + p->dis == dist[p->to] ) {int del = aug(p->to, min(tot - sum, p->cap - p->flow));sum += del, p->flow += del, p->rev->flow -= del;if ( sum == tot ) break;}}vis[x] = false;return sum;}// 最小费用流算法int min_cost_max_flow() {int flow = 0;while ( relabel() )flow += aug(S, INF);return flow;}
} G;int V1[MAXN + 5], V2[MAXN + 5];// 主函数
int main() {int N, M, mx = 0;scanf("%d%d", &N, &M); // N节点数目，M边数目G.init();G.S = 0, G.T = N + M + 1; // 初始化图，设置源点和汇点for (int i = 1; i <= N; i++) {scanf("%d", &V1[i]); // 输入V1数组G.addedge(G.S, i, 1, 0); // 源点连接V1中的每个节点，边权为1，费用为0mx = max(mx, V1[i]);}for (int i = 1; i <= M; i++) {scanf("%d", &V2[i]); // 输入V2数组G.addedge(N + i, G.T, V2[i] / mx, 0); // V2中的每个节点连接汇点，边权为V2[i]/mx，费用为0}for (int i = 1; i <= N; i++)for (int j = 1; j <= M; j++) {int k;scanf("%d", &k); // 输入费用矩阵G.addedge(i, N + j, 1, -k); // 连接V1和V2，边权为1，费用为-k}G.min_cost_max_flow(); // 执行最小费用最大流算法for (int i = 1; i <= N; i++) {int ans = N;for (edge *p = adj[i]; p != NULL; p = p->nxt)if ( p->to != G.S && p->flow == 1 ) ans = p->to; // 配对结果printf("%d\n", ans - N); // 输出配对结果}
//  printf("%d\n", -G.cost); // 如果需要输出总费用，取消注释这一行
}

测试点7：

观察数据，除了物品 $1$ 的体积为 $46972$ 以外，其他物品的体积都为 $11743$ 。

我们直接枚举物品 $1$ 的摆放位置，再跑费用流，取最优值。

时间复杂度有点高，跑了 $1$ 分钟左右才跑出来最优解。

代码：（由于👆面有注释，所以这段不注释了~）

#include<queue>
#include<cstdio>
#include<algorithm>
using namespace std;
const int MAXN = 500;
const int MAXM = 100;
const int MAXV = MAXN + MAXM;
const int INF = (1<<30);
struct edge{int to, flow, cap, dis;edge *nxt, *rev;
}edges[4*MAXN*MAXM + 5], *adj[MAXV + 5], *cur[MAXV + 5], *ecnt;
struct flow_graph{int S, T, cost, dist[MAXV + 5];bool vis[MAXV + 5], inq[MAXV + 5];deque<int>que;void init() {ecnt = &edges[0];for(int i=S;i<=T;i++)adj[i] = NULL;cost = 0;}void addedge(int u, int v, int c, int w) {edge *p = (++ecnt), *q = (++ecnt);p->to = v, p->cap = c, p->dis = w, p->flow = 0;p->nxt = adj[u], adj[u] = p;q->to = u, q->cap = 0, q->dis = -w, q->flow = 0;q->nxt = adj[v], adj[v] = q;p->rev = q, q->rev = p;}bool relabel() {for(int i=S;i<=T;i++)dist[i] = INF, cur[i] = adj[i];que.push_back(S); dist[S] = 0; inq[S] = true;while( !que.empty() ) {int f = que.front(); que.pop_front(); inq[f] = false;for(edge *p=adj[f];p!=NULL;p=p->nxt) {if( p->cap > p->flow ) {if( dist[p->to] > dist[f] + p->dis ) {dist[p->to] = dist[f] + p->dis;if( !inq[p->to] ) {inq[p->to] = true;if( !que.empty() && dist[p->to] < dist[que.front()] )que.push_front(p->to);else que.push_back(p->to);}}}}}return !(dist[T] == INF);}int aug(int x, int tot) {if( x == T ) {cost += tot*dist[T];return tot;}int sum = 0; vis[x] = true;for(edge *&p=cur[x];p!=NULL;p=p->nxt) {if( p->cap > p->flow && !vis[p->to] && dist[x] + p->dis == dist[p->to] ) {int del = aug(p->to, min(tot - sum, p->cap - p->flow));sum += del, p->flow += del, p->rev->flow -= del;if( sum == tot ) break;}}vis[x] = false;return sum;}int min_cost_max_flow() {int flow = 0;while( relabel() )flow += aug(S, INF);return flow;}
}G;
int V1[MAXN + 5], V2[MAXN + 5], val[MAXN + 5][MAXM + 5];
int ans[MAXN + 5], res = 0;
int main() {int N, M; scanf("%d%d", &N, &M);G.S = 0, G.T = N + M + 1; G.init();for(int i=1;i<=N;i++)scanf("%d", &V1[i]);for(int i=1;i<=M;i++)scanf("%d", &V2[i]);for(int i=1;i<=N;i++)for(int j=1;j<=M;j++)scanf("%d", &val[i][j]);for(int i=2;i<=N;i++)for(int j=1;j<=M;j++)G.addedge(i, N + j, 1, -val[i][j]);for(int i=2;i<=N;i++)G.addedge(G.S, i, 1, 0);for(int i=1;i<=M;i++)G.addedge(N + i, G.T, V2[i]/V1[2], 0);G.min_cost_max_flow(); res = -G.cost;for(int i=2;i<=N;i++) {int flag = N;for(edge *p=adj[i];p!=NULL;p=p->nxt)if( p->to != G.S && p->flow == 1 ) flag = p->to;ans[i] = flag - N;}for(int i=1;i<=M;i++) {if( V2[i] < V1[1] ) continue;G.init();for(int j=2;j<=N;j++)for(int k=1;k<=M;k++)G.addedge(j, N + k, 1, -val[j][k]);for(int j=2;j<=N;j++)G.addedge(G.S, j, 1, 0);for(int j=1;j<=M;j++)if( j == i ) G.addedge(N + j, G.T, (V2[j] - V1[1])/V1[2], 0);else G.addedge(N + j, G.T, V2[j]/V1[2], 0);G.min_cost_max_flow();if( res < -G.cost + val[1][i] ) {res = -G.cost + val[1][i]; ans[1] = i;for(int i=2;i<=N;i++) {int flag = N;for(edge *p=adj[i];p!=NULL;p=p->nxt)if( p->to != G.S && p->flow == 1 ) flag = p->to;ans[i] = flag - N;}}}for(int i=1;i<=N;i++)printf("%d\n", ans[i]);
//  printf("%d\n", res);
}

至此，题解部分已经结束，下面是闲谈（可能对你有很大帮助）。

如果你认为这篇题解写的不错的话……动动手指，一键三连吧！👇👇👇

以下所有内容仅供参考，烧显卡本人不负责！

前置神力：

一台 Windows10 或 Windows Server 2019 服务器
CPU：至少 Intel E5-2680V4（16 核心 32 线程）
内存：至少 64 GiB
显卡：至少 Nvidia Tesla P40 和一块 Nvidia Tesla M40（显存一共至少 24 GiB）
以上条件为必要，可适当低配（如果没有，也可以继续看下去，有解决办法）。。
网络：100Mbps 及以上下载带宽，可能需要~~科学上网~~（自行百度，不在文章范围。。）

插话：解决物理神力的问题

你得去一些云服务商租借 GPU云服务器，比如 35003500 一个月的阿里云或者一个月待更新待更新元，对学生党友好的雨云。

~~但……如果你没有零花钱（或者不知道云服务器怎么用），可以不用往下面看了……~~

正题：

众所周知，这题可以用模拟退火来做

这时间复杂度……O(n!)O(n!) ~~我都快疯了！！！~~，开了几个晚上啊！

我们可以考虑将程序挂载到 `GPU` 上运行（关键！！！）

插话：`GPU` 是什么东西？

GPU 就是 图形处理器 的缩写，它可以提供更高的并行运算能力，在 3D设计 游戏开发 图形处理中均有广泛的应用。

当然，GPU 也有他的缺点：价格过高、发热过猛、容易烧坏……

这是一张 GPU 价格一览表：

型号	显存	价格
Nvidia RTX 4090	24 GiB	5050￥
Nvidia Tesla P40	24 GiB	1080￥
Nvidia Tesla K80	24 GiB	899￥
Nvidia Tesla M40	24 GiB	555￥

如何挂载到 `GPU` 上？

首先，我们来看一个 C++ 程序：

#include <bits/stdc++.h>
#include <cuda_runtime.h>
#include <cuda.h>
using namespace std;
int main() {ios :: sync_with_stdio(false);return 0;
}

其中的 #include <cuda_runtime.h> 就是调用 NvidiaNvidia 显卡的库文件，然后就可以挂载显卡了！！

别高兴，我们先得安装驱动~

进入这里，下载 CUDA 和显卡驱动程序，一定要选择对应的版本！

==注意：一定要选择离线版本，否则就只能魔法安装了！==

安装完成之后，就可以安装 Microsoft Visual Studio 2022 了，建议安装 20222022 版本，安装时注意选择跟 C++ 有关的所有模块。

安装完成后，我们测试一个程序，看看能不能正常运行：

#include <bits/stdc++.h>
using namespace std;
int n;
int main() {ios :: sync_with_stdio(false);cin >> n;cout << n * n << endl;vector <int> a;int tmp;for (int i = 1; i <= n; i ++) {cin >> tmp;a . push_back(tmp);}cout << a . size() << endl;return 0;
}

测试 `GPU` 性能&改编程序

如果你的 GPU 不是前面所讲的，那么你可以将 Stable Diffusion 本地化部署：（具体在这里看教程）

如果你的电脑出一张 1024x1024 的图片在 10s 之内的话，就可以继续了~

改编程序：

#include<cmath>
#include<ctime>
#include<queue>
#include<cstdio>
#include<cstdlib>
#include<algorithm>
#include<cuda.h>
#include<cuda_runtime.h>
using namespace std;
const int MAXN = 1000;
const int MAXM = 300;
const int INF = (1<<30);
int V1[MAXN + 5], V2[MAXN + 5], val[MAXN + 5][MAXM + 5];
int ans[MAXN + 5], tmp[MAXN + 5], nw[MAXN + 5], res = 0, N, M;
int get_ans() {int ret = 0, j = 1, re = V2[1];for(int i=1; i<=N; i++) {if( j > M ) tmp[nw[i]] = 0;else if( re >= V1[nw[i]] ) {tmp[nw[i]] = j, re -= V1[nw[i]];ret += val[nw[i]][j];} else j++, i--, re = V2[j];}return ret;
}
int real_ans[MAXN + 5], real_res = 0;
int main() {srand(time(NULL));freopen("drawer8.in", "r", stdin);scanf("%d%d", &N, &M);for(int i=1; i<=N; i++)scanf("%d", &V1[i]);for(int i=1; i<=M; i++)scanf("%d", &V2[i]);for(int i=1; i<=N; i++)for(int j=1; j<=M; j++)scanf("%d", &val[i][j]);while( true ) {for(int i=1; i<=N; i++)nw[i] = i;random_shuffle(nw + 1, nw + N + 1);res = get_ans();for(register double T=1E12; T>=1E-5; T*=0.986) {int x = rand() * 17 % N + 1, y = rand() * 17 % N + 1;swap(nw[x], nw[y]);if( get_ans() > res || 1.0/(1 + exp((res - get_ans())/T)) >= 1.0/rand() ) {for(int j=1; j<=N; j++)ans[j] = tmp[j];res = get_ans();} else swap(nw[x], nw[y]);}if( res > real_res ) {real_res = res;for(int i=1; i<=N; i++)real_ans[i] = ans[i];FILE *f = fopen("drawer8.out", "w");for(int i=1; i<=N; i++)fprintf(f, "%d\n", real_ans[i]);printf("%d\n", real_res);}}
}

==可以注意到，我们在每一个函数前面加上了一个 _即将更新_，就代表这个函数依附于 GPU 运行。==

同时，我们尝试将变量也该用 GPU显存，具体取决于你的电脑驱动了：

#include<cmath>
#include<ctime>
#include<queue>
#include<cstdio>
#include<cstdlib>
#include<algorithm>
#include<cuda.h>
#include<cuda_runtime.h>
using namespace std;
const int MAXN = 1000;
const int MAXM = 300;
const int INF = (1<<30);
int V1[MAXN + 5], V2[MAXN + 5], val[MAXN + 5][MAXM + 5];
int ans[MAXN + 5], tmp[MAXN + 5], nw[MAXN + 5], res = 0, N, M;
int get_ans() {int ret = 0, j = 1, re = V2[1];for(int i=1; i<=N; i++) {if( j > M ) tmp[nw[i]] = 0;else if( re >= V1[nw[i]] ) {tmp[nw[i]] = j, re -= V1[nw[i]];ret += val[nw[i]][j];} else j++, i--, re = V2[j];}return ret;
}
int real_ans[MAXN + 5], real_res = 0;
int main() {srand(time(NULL));freopen("drawer8.in", "r", stdin);scanf("%d%d", &N, &M);for(int i=1; i<=N; i++)scanf("%d", &V1[i]);for(int i=1; i<=M; i++)scanf("%d", &V2[i]);for(int i=1; i<=N; i++)for(int j=1; j<=M; j++)scanf("%d", &val[i][j]);while( true ) {for(int i=1; i<=N; i++)nw[i] = i;random_shuffle(nw + 1, nw + N + 1);res = get_ans();for(register double T=1E12; T>=1E-5; T*=0.986) {int x = rand() * 17 % N + 1, y = rand() * 17 % N + 1;swap(nw[x], nw[y]);if( get_ans() > res || 1.0/(1 + exp((res - get_ans())/T)) >= 1.0/rand() ) {for(int j=1; j<=N; j++)ans[j] = tmp[j];res = get_ans();} else swap(nw[x], nw[y]);}if( res > real_res ) {real_res = res;for(int i=1; i<=N; i++)real_ans[i] = ans[i];FILE *f = fopen("drawer8.out", "w");for(int i=1; i<=N; i++)fprintf(f, "%d\n", real_ans[i]);printf("%d\n", real_res);}}
}

你可以尝试运行；如果没报错，恭喜，可以跳过解决方案！