solution

包除原理。$O(NM)$。

$A$全体の総和を$s = \sum \sum A_{i, j}$、 行$i$の総和を$r_i = \sum A_{i, j}$、列$j$の総和を$c_j = \sum A_{i, j}$とすると、 beauty of $B$は$4s - 2 (r_1 + r_N + c_1 + c_M) + (A_{1, 1} + A_{1, M} + A_{N, 1} + A_{N, M})$。 残りは適当にやる。

implementation

#include <bits/stdc++.h>
#define REP(i, n) for (int i = 0; (i) < int(n); ++ (i))
#define ALL(x) begin(x), end(x)
using ll = long long;
using namespace std;
template <class T> inline void chmax(T & a, T const & b) { a = max(a, b); }
template <typename X, typename T> auto vectors(X x, T a) { return vector<T>(x, a); }
template <typename X, typename Y, typename Z, typename... Zs> auto vectors(X x, Y y, Z z, Zs... zs) { auto cont = vectors(y, z, zs...); return vector<decltype(cont)>(x, cont); }

int main() {
    // input
    int h, w; scanf("%d%d", &h, &w);
    auto a = vectors(h, w, int());
    REP (y, h) REP (x, w) {
        scanf("%d", &a[y][x]);
    }

    // solve
    vector<ll> row(h);
    REP (y, h) {
        row[y] = accumulate(ALL(a[y]), 0ll);
    }
    vector<ll> col(w);
    REP (x, w) {
        REP (y, h) {
            col[x] += a[y][x];
        }
    }
    ll sum_a = accumulate(ALL(row), 0ll);
    auto f = [&](int y1, int y2, int x1, int x2) {
        return 4 * sum_a
            - 2 * (row[y1] + row[y2] + col[x1] + col[x2])
            + (a[y1][x1] + a[y1][x2] + a[y2][x1] + a[y2][x2]);
    };
    ll result = f(0, h - 1, 0, w - 1);
    REP (y, h) {
        if (y !=     0) chmax(result, f(0,     y, 0, w - 1));
        if (y != h - 1) chmax(result, f(y, h - 1, 0, w - 1));
    }
    REP (x, w) {
        if (x !=     0) chmax(result, f(0, h - 1, 0,     x));
        if (x != w - 1) chmax(result, f(0, h - 1, x, w - 1));
    }

    // output
    printf("%lld\n", result);
    return 0;
}