Nov 14 2014

UVa 11600 - Masud Rana

Problem

給 N 個城市，城市之間會有 M 條當前的安全邊，在其餘邊皆有邪惡組織出現，來阻擋城市之間的運行。

現在人在編號 1，每次將隨機挑任何相鄰的城市走過去 (隔天又會再挑一個鄰近的城市)，如果走過去的路上遇到邪惡組織則會將其殲滅，請問期望值需要幾天才能將 N 個城市彼此之間都存有連通路徑。

Sample Input

Sample Output

1 2	Case 1: 1.000000 Case 2: 3.500000

Solution

特別小心輸出的誤差要在 1e-6，以為只需要輸出 1 位，結果一直 WA。

事實上，將狀態壓縮成每一個連通元件的節點個數，對於連通元件之間做拉邊即可。

對於狀態 u, v，期望值 E[u] = 1 + E[v] p + E[u] q，

E[v] * p 表示：將兩個不同連通元件之間拉邊，則會將兩個連通元件融合，機率 p。
E[u] * q 表示：在各自的連通元件內布拉邊，不影響狀態結果。

左移一下得到 E[u] = (1 + E[v] * p) / (1 - q);

這一題必須考慮現在位於哪個連通元件，所以特別標記狀態的第一個連通元件為當前所在位置。

#include <stdio.h> 
#include <vector>
#include <map>
#include <algorithm>
using namespace std;
// similar 1390 - Interconnect
int parent[32], weight[32];
int findp(int x) {
	return parent[x] == x ? x : findp(parent[x]);
}
void joint(int x, int y) {
	x = findp(x), y = findp(y);
	if (x == y)	return;
	if (weight[x] > weight[y])
		parent[y] = x, weight[x] += weight[y];
	else
		parent[x] = y, weight[y] += weight[x];
}
#define hash_mod 1000003
struct state {
	vector<int> c;
	unsigned int hash() {
		unsigned int a = 63689, b = 378551;
		unsigned int value = 0;
		for (int i = 0; i < c.size(); i++) {
			value = value * a + c[i];
			a *= b;
		}
		return value % hash_mod;
	}
	bool operator<(const state &a) const {
		if (c.size() != a.c.size())
			return c.size() < a.c.size();
		for (int i = 0; i < c.size(); i++)
			if (c[i] != a.c[i])
				return c[i] < a.c[i];
		return false;
	}
};
map<state, double> hash[hash_mod];
double dfs(state &u) {
	if (u.c.size() == 1)	return 0;
	sort(u.c.begin() + 1, u.c.end());
	int h = u.hash();
	if (hash[h].find(u) != hash[h].end())
		return hash[h][u];
	double &ret = hash[h][u];
	double self = 0, total = 0;
	ret = 0;
	for (int i = 0; i < u.c.size(); i++)
		total += u.c[i];
	total = total - 1, self = u.c[0] - 1;
	for (int i = 1; i < u.c.size(); i++) {
		state v = u;
		v.c[0] += v.c[i];
		v.c.erase(v.c.begin() + i);
		ret += u.c[i] * dfs(v);
	}
	// E[u] = 1 + E[v] * p + E[u] * q
	ret = (ret / total) + 1;
	ret = ret / (1 - self / total);
	return ret;
}
int main() {
	int n, m, x, y;
	int testcase, cases = 0;
	scanf("%d", &testcase);
	while (testcase--) {
		scanf("%d %d", &n, &m);
		for (int i = 1; i <= n; i++)
			parent[i] = i, weight[i] = 1;
		for (int i = 0; i < m; i++) {
			scanf("%d %d", &x, &y);
			joint(x, y);
		}
		
		state st;
		
		for (int i = 1; i <= n; i++) {
			if (parent[i] == i && parent[i] == findp(1))
				st.c.push_back(weight[i]);
		} 
		for (int i = 1; i <= n; i++) {
			if (parent[i] == i && parent[i] != findp(1)) // root
				st.c.push_back(weight[i]);
		}
		printf("Case %d: %lf\n", ++cases, dfs(st));
	}
	return 0;
}
/*
2
3 1
2 3
4 1
2 3
9999
3 3
1 2
2 3
3 1
*/

Read More +

Nov 14 2014

解題區►解題區 - UVa

UVa 11529 - Strange Tax Calculation

Problem

任挑三個點拉出三角形，三角形內部的點數期望值為何？

Sample Input

Sample Output

1 2	City 1: 0.20 City 2: 0.20

Solution

反過來寫，這一點會被多少個三角形包含住。

為了要計算這一個點 P 被多少三角形包含住，把這個點 P 當作基礎點，對其他點做極角排序。排序完套用極角掃描線算法，對於當前線上的點 Q，往回追溯 180 度內，任挑兩個點與其並成三角形，保證不包含 P。

那麼要得到包含 P 的就反過來用全部組合扣到不包含的結果。

因為要窮舉每一點做極角排序，作法會在 O(n^2 log n)。

#include <stdio.h> 
#include <math.h>
#include <algorithm>
using namespace std;
#define eps 1e-9
const double pi = acos(-1);
struct Pt {
	double x, y;
	double angle;
	Pt(double a = 0, double b = 0):x(a), y(b) {
		angle = atan2(b, a);
	}
	bool operator<(const Pt &a) const {
		if (fabs(angle - a.angle) > eps)
			return angle < a.angle;
		return false;
	}
}; 
long long C[1300][1300] = {};
long long getContainTriangle(int st, Pt D[], int n) {
	static Pt A[4096];
	int m = 0;
	for (int i = 0; i < n; i++) {
		if (i == st)	continue;
		A[m++] = Pt(D[i].x - D[st].x, D[i].y - D[st].y);
	}
	sort(A, A + m);
	for (int i = 0; i < m; i++)
		A[i + m] = A[i], A[i+m].angle += 2 * pi;
	long long ret = 0;
	for (int i = 0, j = 1; i < m; i++) { // point(i, ?, ?)
		while (A[j].angle - A[i].angle <= pi - eps)	j++;
		ret += C[j - i - 1][2];
	}
	return C[m][3] - ret;
}
int main() {
	C[0][0] = 1;
	for (int i = 0; i < 1205; i++) {
		C[i][0] = 1;
		for (int j = 1; j <= i; j++)
			C[i][j] = C[i-1][j-1] + C[i-1][j];
	}
	int n, cases = 0;
	Pt D[1500];
	while (scanf("%d", &n) == 1 && n) {
		for (int i = 0; i < n; i++) 
			scanf("%lf %lf", &D[i].x, &D[i].y);
		long long contain = 0;// contain
		for (int i = 0; i < n; i++)
			contain += getContainTriangle(i, D, n); // with i-th point.
		printf("City %d: %.2lf\n", ++cases, (double)contain / C[n][3]);
	}
	return 0;
}
/*
5
29 84
81 81
28 36
60 40
85 38
5
0 0
10 0
0 10
10 10
6 7
0
*/

Read More +

Nov 14 2014

解題區►解題區 - UVa

UVa 1331 - Minimax Triangulation

Problem

將一個簡單多邊形三角化，並且讓最大面積的三角形越小越好。

Sample Input

Sample Output

9.0

Solution

矩陣鏈乘積的方式著手 dp。

在此必須檢查劃分兩區間的三角形中內部不包含任何點，這裡利用外積 cross 進行計算。

#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include <algorithm>
#include <math.h>
using namespace std;
#define eps 1e-6
struct Pt {
    double x, y;
    Pt(double a = 0, double b = 0):
    x(a), y(b) {}
    bool operator<(const Pt &a) const {
        if (fabs(x-a.x) > eps)
            return x < a.x;
        if (fabs(y-a.y) > eps)
            return y < a.y;
        return false;
    }
    Pt operator-(const Pt &a) const {
        return Pt(x - a.x, y - a.y);
    }
    Pt operator+(const Pt &a) const {
        return Pt(x + a.x, y + a.y);
    }
    bool operator==(const Pt &a) const {
    	return (fabs(x - a.x) < eps && fabs(y - a.y) < eps);
    }
    void read() {
        scanf("%lf %lf", &x, &y);
    }
};
struct Seg {
	Pt s, e;
	Seg(Pt a = Pt(), Pt b = Pt()):
		s(a), e(b) {}
};
double dist(Pt a, Pt b) {
	return hypot(a.x - b.x, a.y - b.y);
}
double dot(Pt a, Pt b) {
	return a.x * b.x + a.y * b.y;
}
double cross2(Pt a, Pt b) {
	return a.x * b.y - a.y * b.x;
}
double cross(Pt o, Pt a, Pt b) {
    return (a.x-o.x)*(b.y-o.y)-(a.y-o.y)*(b.x-o.x);
}
int between(Pt a, Pt b, Pt c) {
	return dot(c - a, b - a) >= 0 && dot(c - b, a - b) >= 0;
}
int onSeg(Pt a, Pt b, Pt c) {
	return between(a, b, c) && fabs(cross(a, b, c)) < eps;
}
int intersection(Pt as, Pt at, Pt bs, Pt bt) {
    if(cross(as, at, bs) * cross(as, at, bt) < 0 &&
       cross(at, as, bs) * cross(at, as, bt) < 0 &&
       cross(bs, bt, as) * cross(bs, bt, at) < 0 &&
       cross(bt, bs, as) * cross(bt, bs, at) < 0)
        return 1;
    return 0;
}
double distProjection(Pt as, Pt at, Pt s) {
	int a, b, c;
	a = at.y - as.y;
	b = as.x - at.x;
	c = - (a * as.x + b * as.y);
	return fabs(a * s.x + b * s.y + c) / hypot(a, b);
}
double ptToSeg(Seg seg, Pt p) {
	double c = 1e+30;
	if(between(seg.s, seg.e, p))
		c = min(c, distProjection(seg.s, seg.e, p));
	else
		c = min(c, min(dist(seg.s, p), dist(seg.e, p)));
	return c;
}
double getArea(Pt a, Pt b, Pt c) {
	return fabs(cross(a, b, c))/2.0;
}
int isEmptyTriangle(Pt a, Pt b, Pt c, Pt D[], int n) {
	for (int i = 0; i < n; i++) {
		if (D[i] == a || D[i] == b || D[i] == c)
			continue;
		if (cross(a, D[i], b) * cross(a, D[i], c) < 0 && 
			cross(b, D[i], a) * cross(b, D[i], c) < 0 &&
			cross(c, D[i], a) * cross(c, D[i], b) < 0)
			return 0;
	}
	return 1;
}
int main() {
	int testcase, n;
	Pt D[128];
	scanf("%d", &testcase);
	while (testcase--) {
		scanf("%d", &n);
		for (int i = 0; i < n; i++)
			D[i].read();
		for (int i = 0; i < n; i++)
			D[i + n] = D[i];
		double dp[128][128];
		for (int i = 0; i < 2 * n; i++) {
			for (int j = 0; j < 2 * n; j++) {
				dp[i][j] = 1e+30;
			}
			dp[i][i] = dp[i][i+1] = 0;
		}
		double ret = 1e+30;
		for (int i = 2; i < n; i++) {
			for (int j = 0; j < n; j++) {// [j, j+i]
				int l = j, r = j + i;
				double &v = dp[l][r];
				for (int k = l+1; k < r; k++) {
					if (isEmptyTriangle(D[l], D[r], D[k], D, n))
						v = min(v, max(max(dp[l][k], dp[k][r]), getArea(D[l], D[r], D[k])));
				}
			}
		}
		for (int i = 0; i < n; i++)
			ret = min(ret, dp[i][i + n - 1]);
		printf("%.1lf\n", ret);
	}
	return 0;
}
/*
1
6
7 0
6 2
9 5
3 5
0 3
1 1
*/

Read More +

Nov 12 2014

學校課程►通識課程

[通識] 文明的進程小考集

好幾題都 0 分，也許只是相性不合。而我也只是寫寫我的回答，沒有什麼標準答案。

Week 1

Brad 小學時的校長做了一件甚麼事情讓 Brad 終身難忘？
讓 Brad 參加音樂會，在音樂會的最後上台自白疾病的原因與如何面對，受到同學們的認同與理解。
Brad 的父親最後幫學校圖書館建書櫃，還送 Brad 甚麼當作和解的禮物？
一頂工地安全帽。
Brad 接電話時為什麼總是先說自己養了狗？
因為妥瑞症引起的聲音有如狗吠一般，先避免對方猜疑發生了什麼。
Brad 說他也有閱讀困難，是怎麼樣的困難？
無法控制自己的脖子不時擺動，使得無法長時間專注於書本內容。
Brad 對於進入那些公共場合有所顧忌？為什麼？
圖書館、教會、電影院、音樂會。因為那而的人們必須保持固定的儀態和禮貌。

Week 2

哪個特質不屬於現代心靈：(1) 顧忌他人觀感 (2) 喜怒形於色 (3) 觀察盤算對方。
(2) 喜怒形於色。(1)(3) 都有著抑制自我生物本能、顧及他人的模式，(2) 則沒有。
中世紀餐桌上最不常見的餐具是？ (1) 主餐盤 (2) 刀子 (3) 叉子
(3) 叉子。刀子與主餐盤為餐點必備的工具和器材，而叉子是用來區隔他人食物而存在，當時物質不豐裕的情況下，算是奢侈地存在。
為什麼 Erasmas 在禮儀書中判斷「用餐時胡吃海塞」是鄉巴佬的行為？
原本作答為 有如飢餓的野獸，為了區隔人與野獸的差別。 0 分
行為不得體，沒有顧及他人的觀感，鄉巴佬總是做這些事情。(應該類似於這種回答？)
人際互動的「文明化」如何有助於科學研究方法的發展？
原本作答為 文明化的互動反應物質生活與當時文化的流行，是一個直接的證據。 0 分
人際互動的行為反應社會階級結構，藉此了解結構進行研究。(應該類似於這種回答？)
越是自命文明的人就越容易對他人的舉止感到「不舒服」為什麼？
原本作答為 因為認為禮儀與自己相同才算文明，舉止不同的他人只會像野獸。 0 分
階級高低的差別反應在舉止，因此在對於不同階級的人感到厭惡，也等同於對別人行為感到不舒服。

Week 3

中世紀貴族用餐最可能出現的菜色是：牛排、無骨鴨胸肉、全羊？
全羊。當時還沒有食物幕後處理的概念，之後才有避見死相的道德感。
中世紀禮儀最不可能反映的是：身分階級、個人偏好、家庭背景？
個人偏好。當時禮儀與階級有關，不會涉及強調個人。
文藝復興禮儀書最不可能說的是：不可粗俗、注意衛生、迴避不雅？
原本作答為 衛生是後期科技進步，才用衛生理由取代粗俗的理由。 0 分
注意衛生。當時講就是顧及別人，從不可粗俗、迴避不雅中可以發現都在顧及別人的觀感。注意衛生是在民主平等後，階級不重要，顧及自己才更有約束力。
就食物的準備而言，為何西方人覺得中華文化的飲食方式很「文明」？
因為食物會經過多道手法，讓食用時看不出原貌，並且便於入口。不用吐骨。
這兩張「最後的晚餐」，哪張場景看起來年代較早？你的理由？
第二張較早，因為沒有個人的餐具。晚期的餐具才比較普及。

居然有藝術類型學生答題，根據畫風的不同來判斷年代 … 等，給這專業跪了，不過想必是 0 分 。

Week 4

文藝復興時期常常提醒人們「天使無所不在」，目的是什麼？
在不需要顧及他人的需求時，仍要遵守禮儀的理由。
禮儀書說，便後從廁所回到社交場合時，不能讓人看出洗過手，為什麼？
會讓場合的人因濕淋淋的手而想起廁所的那些汙穢。(備註：早期甚至希望不洗手。)
台北捷運地上劃的排隊等候線，其實不是 高度文明 的標誌，為什麼？
因為需要外物的約束，如何不需要線就能遵守，表示規則內化，那是更文明。
為何用禮儀書來提升自己氣質的人，反而暴露自己階級地位不高？
原本作答為 禮儀書的存在是在階級流動的幫手，教導低階級融入上層階級。 0 分
教導階級高的社為人士的行為模式，正因為需要學習，才表示自己不處於上層階級。
就 Elias 而言，為什麼社會越文明，其兒童與成人之間的差距就越大？
成人的禮儀要求越高，兒童在短時間內無法對百年禮儀融會貫通，而這只是相對差距。

Week 5

在課程 ppt 中，老師用多重比基尼泳裝的貓咪來嘲諷什麼？
原本作答為 沒有必要這麼遮遮掩掩，一個母親的象徵竟被當作情色。 0 分
過度地遮掩身體、過度道德化的結果。
以下何者不是社會歷史發展的結果？(1) 哈欠 (2) 洗手 (3) 掩鼻 (4) 遮體。為什麼？
打哈欠一直都是生物自然反應。其餘皆為後天環境影響而做的事情。但是打哈欠用手遮住也算是後天教育而成。
為什麼當前的清涼穿著其是不是世風日下，而是文明化成功的象徵。

世風日下：社會的風俗習慣日漸澆薄，每況愈下。(答題時根本不知道這個詞什麼意思。)

原本作答為 因為向別人暴露身體原本是羞恥的事，面對自己身體不怕外人注目，是心靈上的昇華，外表遮掩不是限制。 0 分 (生物本能不就是要遮掩自我弱點嗎？)
因為環境影響，代表治安好，不會有鹹豬手的人出現，因此才敢做出清涼穿著。
「人生：就是成人的生活」這個信念對於中世紀的兒童教育有何引響？
原本作答為 讓兒童失去原本應有的童年，更早步入成人階段，沒有純真無潔的思想，都被羞恥的教育而想太多什麼不能做的事情。 0 分
很多事情不能去做、不能去想。因此很多事情也不知道後果的嚴重性。
文明的 硬體技術 可以使得羞恥情感固定下來，請舉一個例子說明。

那時卡在硬體設備與軟體技術，兩個弄在一起時，一直想不到那是什麼鬼，於是在被迫交卷前亂填答案。結果只是單純講硬體設備。

廁所。利用大量的裝飾、氣氛，來隔絕與生物的排泄行為的聯想。

結語

被助教寫了 沒掌握本課程關鍵重點 、 沒有吸收 。我不否認，好幾堂課還在想算法分析。

Read More +

Nov 11 2014

解題區►解題區 - UVa

UVa 12058 - Highway Monitor

Problem

在 n 個點的高速公路上，需要監視 m 條邊上的情況，監視器只能位於點上，並且可以監控所有與該點相連的邊，費用有限最多只能放置 k 個監視器，找到其中一種使用少於等於 k 個監視器的方法。

Sample Input

Sample Output

1 2	Case #1: no Case #2: yes 1 2 5

Solution

解決最少點集覆蓋所有邊，是一個 NP 問題，雖然這一題已經給定限制搜索必須小於等於 K。

對於 DLX (Dancing Links and Algorithm X) 運用在最少重複覆蓋而言，K 是沒有太大的必要，事實上根據 degree 最大的節點開始窮舉的效果跟 DLX 差不多的。而這一題沒有要求最少，只是請求在範圍內的任一解輸出即可。

DLX 一開始用在精準覆蓋，解決重複覆蓋的效果只能說還算可以，大多的時間都花在計算估價函數，那個啟發式估價是用最簡單的貪心。有時候需要覆蓋的 colume 數量非常多，所以寫法上用 memset 也許是不錯的，但是用懶標記是更為快速。

DLX 的精神在於雙十字鏈表，並且每次找最少可能的 colume 開始窮舉，窮舉時會斷開所有相關在其他 colume 的可能，並在移除掉不需要窮舉的 colume，而在重複覆蓋上則不會移除掉需要窮舉的 colume。

精确覆盖：
首先选择当前要覆盖的列(含1最少的列)，将该列和能够覆盖到该列的行全部去掉，再枚举添加的方法。枚举某一行r，假设它是解集中的一个，那么该行所能覆盖到的所有列都不必再搜，所以删除该行覆盖到的所有列，又由于去掉的列相当于有解，所以能够覆盖到这些列的行也不用再搜，删之。

重复覆盖：
首先选择当前要覆盖的列(同上)，将该列删除，枚举覆盖到该列的所有行：对于某一行r，假设它是解集中的一个，那么该行所能覆盖到的列都不必再搜，所以删除该行覆盖到的所有列。注意此时不用删去覆盖到这些列的行，因为一列中允许有多个1。这里有一个A*的优化：估价函数h意义为从当前状态最好情况下需要添加几条边才能覆盖。

oshixiaoxiliuwww.cnblogs.com/jh818012/p/…

這一題會有 n 個 row，每一個 row 會覆蓋第 i 個節點覆蓋哪些邊。因此需要對所有輸入的邊進行編號，也就是 colume 會有 m 個。m 最大 50 萬，窮舉起來相當刺激。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <vector>
#include <algorithm>
#include <assert.h>
using namespace std;
#define MAXV 0x3f3f3f3f
#define MAXE 1048576
#define MAXC 1048576
#define MAXR 1024
struct DacingLinks {
    int left, right;
    int up, down;
    int ch, rh;
    int data; // extra info
} DL[MAXE];
int s[MAXC], o[MAXR], head, size, Ans, findflag;
void Remove(int c) {
    static int i;
    for(i = DL[c].down; i != c; i = DL[i].down) {
        DL[DL[i].right].left = DL[i].left;
        DL[DL[i].left].right = DL[i].right;
        s[DL[i].ch]--;
    }
}
void Resume(int c) {
    static int i;
    for(i = DL[c].down; i != c; i = DL[i].down) {
        DL[DL[i].right].left = i;
        DL[DL[i].left].right = i;
        s[DL[i].ch]++;
    }
}
int used[MAXC] = {};
int H() {
    static int c, ret, i, j, time = 0;
    for(c = DL[head].right, ++time, ret = 0; c != head; c = DL[c].right) {
        if(used[c] != time) {
            ret ++, used[c] = time;
            for(i = DL[c].down; i != c; i = DL[i].down)
                for(j = DL[i].right; j != i; j = DL[j].right)
                    used[DL[j].ch] = time;
        }
    }
    return ret;
}
void DFS(int k) {
    if(k + H() >= Ans)    return;
    if(DL[head].right == head) {
        if(k < Ans) {
        	Ans = k, findflag = 1;
			printf("yes");
			for (int i = 0; i < k; i++)
				printf(" %d", DL[o[i]].data);
			puts("");
        }
        return;
    }
    int t = MAXV, c, i, j;
    for(i = DL[head].right; i != head; i = DL[i].right) {
        if(s[i] < t) {
            t = s[i], c = i;
        }
    }
    for(i = DL[c].down; i != c; i = DL[i].down) {
        o[k] = i;
        Remove(i);
        for(j = DL[i].right; j != i; j = DL[j].right)	Remove(j);
        DFS(k+1);
        for(j = DL[i].left; j != i; j = DL[j].left)     Resume(j);
        Resume(i);
        if (findflag)	break;
    }
}
int new_node(int up, int down, int left, int right) {
	assert(size < MAXE);
    DL[size].up = up, DL[size].down = down;
    DL[size].left = left, DL[size].right = right;
    DL[up].down = DL[down].up = DL[left].right = DL[right].left = size;
    return size++;
}
void addrow(int n, int Row[], int data) {
    int a, r, row = -1, k;
    for(a = 0; a < n; a++) {
        r = Row[a];
        DL[size].ch = r, s[r]++;
        DL[size].data = data;
        if(row == -1) {
            row = new_node(DL[DL[r].ch].up, DL[r].ch, size, size);
            DL[row].rh = a;
        }else {
            k = new_node(DL[DL[r].ch].up, DL[r].ch, DL[row].left, row);
            DL[k].rh = a;
        }
    }
}
void init(int m) {
    size = 0;
    head = new_node(0, 0, 0, 0);
    int i;
    for(i = 1; i <= m; i++) {
        new_node(i, i, DL[head].left, head);
        DL[i].ch = i, s[i] = 0;
    }
} 
int main() {
	int testcase, cases = 0;
	int n, m, k, x, y;
	scanf("%d", &testcase);
	while (testcase--) {
		scanf("%d %d %d", &n, &m, &k);
		vector<int> g[1024];
		for (int i = 1; i <= m; i++) {
			scanf("%d %d", &x, &y);
			g[x].push_back(i);
			g[y].push_back(i);
		}
		assert(m < MAXC);
		init(m);
		int row[1024];
		for (int i = 1; i <= n; i++) {
			sort(g[i].begin(), g[i].end());
			for (int j = 0; j < g[i].size(); j++)
				row[j] = g[i][j];
			addrow(g[i].size(), row, i);
		}
		printf("Case #%d: ", ++cases);
		Ans = k + 1, findflag = 0;
		DFS(0);
		if (Ans == k+1)
			puts("no");
	}
	return 0;
}

Read More +

Nov 11 2014

解題區►解題區 - UVa

UVa 12052 - Cyber cafe

Problem

給一張圖有 N 個節點、M 條邊，其中要放置 S 台新主機於其中 S 個節點，而沒放置的節點繼續使用舊主機，為了服務舊主機的網咖，每個舊節點旁邊最多只能有一個新主機網咖。

請問有多少種放置方法。

Sample Input

Dhaka
6 2 7
AB BC CD DE EF FA AD
Chittagong
4 3 4
AB AC AD BC
Sylhet
3 2 3
AB BC CA
TheEnd

Sample Output

Dhaka
9
Chittagong
1
Sylhet
0
TheEnd

Solution

對於每一個節點的連接方式用一個 32-bit 壓縮，接著窮舉所有可能放置方法 state，對於沒有放置的節點，直接壓縮結果與 state 做 AND 運算，算 bits 數是否都小於 2。

#include <stdio.h>
#include <string.h>
#include <algorithm>
using namespace std;
int main() {
	char ss[1024];
	int n, s, p;
	while (scanf("%s", &ss) == 1) {
		printf("%s\n", ss);
		if (!strcmp(ss, "TheEnd"))
			break;
		scanf("%d %d %d", &n, &s, &p);
		int g[26][26] = {};
		for (int i = 0; i < p; i++) {
			scanf("%s", ss);
			int x = ss[0] - 'A', y = ss[1] - 'A';
			g[x][y] = g[y][x] = 1;
		}
		int mask[26] = {};
		for (int i = 0; i < n; i++)
			for (int j = 0; j < n; j++)
				mask[i] |= g[i][j]<<j;
		int ret = 0;
		for (int i = (1<<n)-1; i >= 0; i--) {
			if (__builtin_popcount(i) == s) {
				int ok = 1;
				for (int j = 0; j < n && ok; j++) {
					if ((i>>j)&1)	continue;
					if (__builtin_popcount(mask[j]&i) >= 2)
						ok = 0;
				}
				ret += ok;
			}
		}
		printf("%d\n", ret);
	}
	return 0;
}

Read More +

Nov 11 2014

解題區►解題區 - UVa

UVa 12051 - Mazes in Higher Dimensions

Problem

好比 2D 方格，從左下角到右上角的方法數。而這一題是在三維空間，一樣的方式要越來越靠近右上方的頂點，但是部分點會有障礙物，會造成無法通行。

題目給定的維度事實上是終點座標，而非該維度數，用 row major 的時候要計算 dim[i]+1。

Sample Input

Sample Output

1 2	Case 1: 756756 Case 2: 6318655200

Solution

特地用懶標記完成 dp 計算，但是忘了有可能無法抵達終點，忘了補上最對於終點的懶標記操作，直接輸出 dp[ret] 結果一直噴 WA。

懶標記檢查 if (used[ret] != testcase) dp[ret] = 0;。

#include <stdio.h> 
#include <string.h>
#include <algorithm>
#include <queue>
using namespace std;
#define MAXN 3000005
long long dp[MAXN] = {};
int used[MAXN] = {}, ob[MAXN] = {}; // obstacle
int main() {
	memset(dp, 0, sizeof(dp));
	memset(ob, 0, sizeof(ob));
	memset(used, 0, sizeof(used));
	int n, m, dim[16], row[16], v[16];
	int testcase = 0, cases = 0;
	while (scanf("%d %d", &n, &m) == 2 && n+m) {
		for (int i = 0; i < n; i++) {
			scanf("%d", &dim[i]);
			dim[i]++;
		}
		testcase++;
		row[n - 1] = 1;
		for (int i = n - 2; i >= 0; i--)
			row[i] = row[i+1] * dim[i+1];
		for (int i = 0; i < m; i++) {
			for (int j = 0; j < n; j++)
				scanf("%d", &v[j]);
			int x = 0;
			for (int j = 0; j < n; j++)
				x += v[j] * row[j];
			ob[x] = testcase;
		}
		if(ob[0] == testcase){
            printf("Case %d: 0\n", ++cases);
            continue;
        }
		int ret = 0;
		for (int i = 0; i < n; i++)
			ret += (dim[i] - 1) * row[i];
		queue<int> Q;
		Q.push(0), used[0] = testcase;
		dp[0] = 1;
		while (!Q.empty()) {
			int x = Q.front();
			Q.pop();
			long long ways = dp[x];
			for (int i = 0; i < n; i++)
				v[i] = x / row[i], x %= row[i];
			for (int i = 0; i < n; i++) {
				v[i]++;
				if (v[i] < dim[i]) {
					x = 0;
					for (int j = 0; j < n; j++)
						x += v[j] * row[j];
					if (ob[x] != testcase) {
						if (used[x] != testcase)
							dp[x] = 0;
						dp[x] += ways;
						if (used[x] != testcase)
							Q.push(x), used[x] = testcase;
					}
				}
				v[i]--;
			}
		}
		if (used[ret] != testcase)	dp[ret] = 0;
		printf("Case %d: %lld\n", ++cases, dp[ret]);
	}
	return 0;
}
/*
3 0 
5 5 5
 
3 1 
9 8 7
1 1 1
3 2
9 8 7
1 1 1
5 6 6
3 2
9 8 7
2 3 4
9 8 7
3 2
9 8 7
2 3 4
9 8 7
0 0
*/

Read More +

Nov 11 2014

解題區►解題區 - UVa

UVa 10381 - The Rock

Problem

給定一個 N x M 地圖，已知有部分的牆壁和一個透明的牆壁，每一個障礙物都佔據一個方格，而透明牆壁一開始不知道位於何處，直到碰壁的時候才發現，找一條最慘情況下的最短路徑。

Sample Input

2
8 8
..X.....
........
##.##...
........
##.##...
##.##...
........
..E.....
 
3 4
..X.
.##.
.E..

Sample Output

1
2

15
11

Solution

最小化最大值，首先窮舉任何一個透明牆壁可能出現的地方，對於透明牆壁的鄰近四格建立再碰壁方向下，抵達終點的最短路徑。

接著，使用最短路的方式進行更新，在這裡使用 dijkstra 的方式，對於每一個點的狀態為 (走到這裡第幾步) = 從起點走到這，最慘的偏移路徑最小化 為何。

概念上理解，設計一條路徑，路徑畫好後，對於每一點賭下一個點就是透明牆壁，偏移路徑長最長為何就會是該路徑的花費。

#include <stdio.h>
#include <string.h>
#include <queue>
#include <assert.h>
#include <algorithm>
using namespace std;
char g[64][64];
int dist[64][64], walldist[64][64][4];
int n, m;
const int dx[] = {0, 1, 0, -1};
const int dy[] = {1, 0, -1, 0};
struct State {
	int x, y;
	int g, h;
	State(int a=0, int b=0, int c=0, int d=0):
		x(a), y(b), g(c), h(d) {}
	bool operator<(const State &a) const {
		return h > a.h;
	}
};
int dp[40][40][40 * 40];
int search(int sx, int sy, int ex, int ey) {
	priority_queue<State> pQ;
	State u, v;
	int tx, ty, h;
	pQ.push(State(sx, sy, 0, 0));
	memset(dp, 63, sizeof(dp));
	while (!pQ.empty()) {
		u = pQ.top(), pQ.pop();
		if (u.h > dp[u.x][u.y][u.g])
			continue;
		if (u.x == ex && u.y == ey)
			return u.h;
		for (int i = 0; i < 4; i++) {
			tx = u.x + dx[i], ty = u.y + dy[i];
			if (tx < 0 || ty < 0 || tx >= n || ty >= m)
				continue;
			if (g[tx][ty] == '#' || g[tx][ty] == 'E')
				continue;
			if (g[tx][ty] == '.') 
				h = max(u.h, u.g + walldist[u.x][u.y][i]);
			else if (g[tx][ty] == 'X')
				h = max(u.h, u.g);
			assert(u.g + 1 < 40 * 40);
			if (dp[tx][ty][u.g + 1] > h) {
				dp[tx][ty][u.g + 1] = h;
				pQ.push(State(tx, ty, u.g + 1, h));
			}
		}
	}
	return 0;
}
void bfs(int sx, int sy) {
	memset(dist, 63, sizeof(dist));
	queue<int> X, Y;
	int tx, ty;
	X.push(sx), Y.push(sy);
	dist[sx][sy] = 0;
	while (!X.empty()) {
		sx = X.front(), X.pop();
		sy = Y.front(), Y.pop();
		for (int i = 0; i < 4; i++) {
			tx = sx + dx[i], ty = sy + dy[i];
			if (tx < 0 || ty < 0 || tx >= n || ty >= m)
				continue;
			if (g[tx][ty] == '#' || dist[tx][ty] <= dist[sx][sy] + 1)
				continue;
			dist[tx][ty] = dist[sx][sy] + 1;
			X.push(tx), Y.push(ty);
		}
	}
}
int main() {
	int testcase;
	scanf("%d", &testcase);
	while (testcase--) {
		scanf("%d %d", &n, &m);
		for (int i = 0; i < n; i++)
			scanf("%s", g[i]);
		int sx, sy, ex, ey, tx, ty;
		
		memset(walldist, 0, sizeof(walldist));
		for (int i = 0; i < n; i++) {
			for (int j = 0; j < m; j++) {
				if (g[i][j] == 'E')	sx = i, sy = j;
				if (g[i][j] == 'X')	ex = i, ey = j;				
			}
		}
		for (int i = 0; i < n; i++) {
			for (int j = 0; j < m; j++) {
				if (g[i][j] != '.')
					continue;
				g[i][j] = '#';
				bfs(ex, ey);
				for (int k = 0; k < 4; k++) {
					tx = i + dx[k], ty = j + dy[k];
					if (tx < 0 || ty < 0 || tx >= n || ty >= m)
						continue;
					if (g[tx][ty] == '#')
						continue;
					walldist[tx][ty][(k+2)%4] = dist[tx][ty];
				}
				g[i][j] = '.';
			}
		}
		int ret = search(sx, sy, ex, ey);
		printf("%d\n", ret);
	}
	return 0;
}
/*
2
8 8
..X.....
........
##.##...
........
##.##...
##.##...
........
..E.....
 
3 4
..X.
.##.
.E..
*/

Read More +

Nov 10 2014

學校課程►自然語言

自然語言處理 - HW02

編譯環境

Dev-C++ 5.6.3

作業內容

給予 5000 筆的亞馬遜書店評論，每一個評論都已經分類好，總共有五種類型的書評，分別為 book, dvd, health, music, 以及 toys_games。

利用這 5000 筆分類好的書評寫一個分類器，接著讀入另外測試資料，測試資料每一個書評有既定分類，使用分類器判斷是否符合既定分類，分別計算對於每一種分類書評的準確度 (Accuracy) 和精準度 (Precision)。

準確度：接近正確答案的機率，在這裡可以理解為分類出來的結果等於實際結果的機率。
精準度：測試結果一致的機率，在這裡可以理解為分類出來的結果中，實際上是正確的機率。(是否集中於某個實際結果)

還有一個混用準確度和精準度的概念 f，假設準確度為 R，精準度為 P。

$$F = \frac{1}{\alpha \frac{1}{P} + (1 - \alpha) \frac{1}{R}} = \frac{(\beta^{2} + 1) PR}{\beta^{2}P+R}$$

所謂的 F1 定義為 $\beta = 1, \alpha = 1/2$，最後得到 $F = 2PR / (P+R)$

wikien.wikipedia.org/wiki/…

關於輸入輸出資料

gold_standard.xml ：黃金標準的 5000 筆評論
test_outcome.xml ：原本認為是 測試用的反饋資料，用來檢測寫的分類器好不好。 後更正為助教的分類器跑出來的結果。

通常會拿到一大筆資料，經驗上 3/4 的資料會拿來訓練，剩下 1/4 會拿來檢測。也就是資料量於 3 : 1，至於一段資料怎麼切分有很多方式，可以擷取中間一段的 1/4 … 等都可以。

看一下 xml 格式

gold_standard.xml

<RDF rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  <Text category="music">Forget the fact that despite the grumbling
  about today's "music" ...</Text>
  <Text category="health">Printed all over the box are the words
  "with infrared heat". ...</Text>
  <Text category="music">I just finished seeing the movie and loved
  the song at the end ...</Text>
  ...

看起來最多兩層，如果不用插件來解析 XML，遞迴處理寫一個 parser 應該不是問題。

於是就這麼蠻幹 XML parser 下去，仔細查閱一下給定的 XML，會發現文件中有 &amp; 和 &amp;quot; 的確在 XML 的元素 content 不會出現 >, < … 等字元，就跟 HTML 的 character entities 一樣都需要被替換成特殊的顯示規格。但真沒有見過上述的那兩個格式，仔細檢查發現應該是對應 and 字串和 " 引號。解析完作 replace 動作。

回過頭來看這次所需要的公式：

$$P(c) = \frac{N_{c}}{N} \\ P(w|c) = \frac{count(w,c)+1}{count(c)+|V|} \\ P(c|s) = P(c) \prod P(w_{i}|c)$$

參數說明：

$P(c)$ 表示該分類佔有群體的機率，也就是在 5000 筆評論中，分類 c 佔有百分比。$N_{c}$ 表示有多少筆評論屬於 c 分類。$N$ 表示評論筆數，這裡已知 $N = 5000$
$P(w| c)$ 單詞 w 在分類 c 的出現機率為何，$count(w,c)$ 單詞 w 在分類 c 中出現的次數，$count(c)$ 屬於 c 分類中字詞總數 (評論總共有多少字)，$| V|$ 分類 c 中使用的單詞集合大小。
$P(c| s)$ 評論句子 s 在分類 c 的機率為何。

隨後找 $P(c| s)$ 機率值最大作為分類依準。

最後要找到分類是否正確，對於每一種分類 c 會得到表格

實際結果\分類結果	Classifier no	Classifier yes
Truth no	true negative(TN)	false positive(FP)
Truth yes	false negative(FN)	true positive(TP)

準確度 R 意即：TP / (TP + FN)、精準度 P 意即：TP / (TP + FP)

對於 Micro-average 和 Macro-average 的計算，Micro 和 Macro 概念上我理解程式幾何平均和算數平均的概念，Micro 會把每個分類 c 的表單結果累加到一個表格，隨後按照一般分類算法計算準確和精準度，而 Macro 則是將每個分類算完的結果取算術平均。

代碼

傻傻地用分類器判斷與助教的決策比較的運行結果。

process gold_standard.xml ...
statistics model built ...
--------------------+----------
book                |      1000
dvd                 |      1000
health              |      1000
music               |      1000
toys_games          |      1000
--------------------+----------
process test_outcome.xml ...
testing test_outcome.xml ...
| AC\Assign|      book|       dvd|    health|     music|toys_games|
|      book|       892|        29|        17|         9|        25|
|       dvd|        22|       829|        16|        48|        41|
|    health|        30|        19|       975|        46|       177|
|     music|         5|        13|        12|       870|        28|
|toys_games|        10|        16|        43|        21|       807|
book
        p :0.9301355579
        r :0.9176954733
        f :0.9238736406
dvd
        p :0.9150110375
        r :0.8671548117
        f :0.8904403867
health
        p :0.9172154280
        r :0.7818765036
        f :0.8441558442
music
        p :0.8752515091
        r :0.9375000000
        f :0.9053069719
toys_games
        p :0.7486085343
        r :0.8996655518
        f :0.8172151899
Micro-average
        p :0.8746000000
        r :0.8746000000
        f :0.8746000000
Macro-average
        p :0.8772444134
        r :0.8807784681
        f :0.8790078886
--------------------------------
Process exited after 6.545 seconds with return value 0

後來發現是公式理解錯誤

真陽性 (TP, true positive)
正確的肯定。又稱：命中 (hit)

真陰性 (TN, true negative)
正確的否定。又稱：正確拒絕 (correct rejection)

偽陽性 (FP, false positive)
錯誤的肯定，又稱：假警報 (false alarm)，第一型錯誤

偽陰性 (FN, false negative)
錯誤的否定，又稱：未命中 (miss)，第二型錯誤

wikizh.wikipedia.org/wiki/…

準確度就是在正確答案中找誤判，而精準度就是在判斷正確下找錯誤。

後來實驗用 gold_standard.xml train 和 test，運行結果為

| AC\Assign|      book|       dvd|    health|     music|toys_games|
|      book|       950|         7|        15|         3|        25|
|       dvd|         7|       895|        33|        21|        44|
|    health|         1|         0|       984|         1|        14|
|     music|         1|         3|        15|       968|        13|
|toys_games|         0|         1|        16|         1|       982|
book
        p :0.9906152242
        r :0.9500000000
        f :0.9698825932
dvd
        p :0.9878587196
        r :0.8950000000
        f :0.9391395593
health
        p :0.9256820320
        r :0.9840000000
        f :0.9539505574
music
        p :0.9738430584
        r :0.9680000000
        f :0.9709127382
toys_games
        p :0.9109461967
        r :0.9820000000
        f :0.9451395573
Micro-average
        p :0.9558000000
        r :0.9558000000
        f :0.9558000000
Macro-average
        p :0.9577890462
        r :0.9558000000
        f :0.9567934893

當然不能拿相同的 train 和 test data 來用，但是也明顯地發現，這個 model 仍然沒有辦法達到很高純度的結果，在部分比例中也只達到 90% 的精度。

那有沒有一種情況，我們將六種分類的共同交集字符給去除？這樣的效果會不會比較好呢？例如常用的 I、think、it … 等，在 model 中直接無視這些常用字集的效果如何呢？

void custom() {
	set<string> common;
	int first_set = 1;
	for (map<string, map<string, int> >::iterator it = categories.begin();
		it != categories.end(); it++) {
		map<string, int> &R = it->second;
		set<string> s;
		for (map<string, int>::iterator jt = R.begin(); 
			jt != R.end(); jt++)
			s.insert(jt->first);
		if (first_set) 	common = s, first_set = 0;
		else {
			set<string> C;
			set_intersection(common.begin(), common.end(), s.begin(), s.end(), inserter(C, C.begin()));
			common = C;
		}
	}
	for (map<string, map<string, int> >::iterator it = categories.begin();
		it != categories.end(); it++) {
		map<string, int> &R = it->second;
		for (set<string>::iterator jt = common.begin();
			jt != common.end(); jt++)
			R.erase(*jt);
		int cnt = 0;
		for (map<string, int>::iterator jt = R.begin();
			jt != R.end(); jt++)
			cnt += jt->second;
		count_category[it->first] = cnt;
	}
}

上述代碼為將每一個分類的字集與共同交集，並且將共同交集從每個分類中移除，同時將記數重新統計。根據實驗結果，很多分類都直接噴掉，也許是專有名詞使用的量太少，雖然每個字集仍然有上千上萬不等，但是在分類效應上某些分類完全消失。效果差到不行，所以就直接無視吧。或者提供點意見吧。

回過頭來解一下關於作業內容，其實作業內容可以化簡為：

輸入只有一組測資，該組測資會有數行，每一行上會有兩個分類結果，第一個分類結果為標準答案，第二個分類結果為根據模型運算的結果。對於測資輸出每一個分類的準確度、精準度以及 f1 的數據結果。

兩個輸入檔案可以壓縮為

Input

5000
music music
book health
music music
toys_games book 
music music
... here are more rows

很明顯地，第二個評論中，誤把 book 類型判斷成 health。

Output

| AC\Assign|      book|       dvd|    health|     music|toys_games|
|      book|       907|        25|        48|         7|        13|
|       dvd|        35|       873|        43|        24|        25|
|    health|        10|         2|       932|        11|        45|
|     music|         9|        46|        56|       865|        24|
|toys_games|        11|        10|       168|        21|       790|
book
        p :0.9331275720
        r :0.9070000000
        f :0.9198782961
dvd
        p :0.9131799163
        r :0.8730000000
        f :0.8926380368
health
        p :0.7473937450
        r :0.9320000000
        f :0.8295505118
music
        p :0.9321120690
        r :0.8650000000
        f :0.8973029046
toys_games
        p :0.8807134894
        r :0.7900000000
        f :0.8328940432
Micro-average
        p :0.8734000000
        r :0.8734000000
        f :0.8734000000
Macro-average
        p :0.8813053583
        r :0.8734000000
        f :0.8773348714
--------------------------------
Process exited after 1.736 seconds with return value 0

答案算出來當然跟助教一下啊，知道什麼是 sample output 嗎？在 ACMer 的眼中，代表根據算法不同，答案也許會有誤差，或者是測資不同造成答案不同。而 sample output 通常告訴我們的是輸出格式與可能情況，而非測資的正確答案。

我就是很笨，無法理解這個世界。不知道那檔案是有序對應，看到 attribute 名稱 name 一樣，內容 content 不一樣就自動補完無法理解的英文部分，而沒有去看 tag 中 inner content 內容是相同的。

#include <stdio.h>
#include <map>
#include <vector>
#include <iostream>
#include <fstream>
#include <assert.h>
#include <sstream>
#include <math.h>
#include <ctype.h>
#include <string>
#include <string.h>
#include <set>
#include <algorithm>
using namespace std;
class XMLParser {
	public:
	struct Node {
		string tag_name, content;
		map<string, string> attr;
		vector<Node> child;
	};
	Node root; 
	XMLParser(string text = "") {
		sin << text;
		root = Node();
		build(root, "");
	}	
	void replaceAll(std::string& str, const std::string& from, const std::string& to) {
    	if(from.empty())
        	return;
    	size_t start_pos = 0;
    	while((start_pos = str.find(from, start_pos)) != std::string::npos) {
        	str.replace(start_pos, from.length(), to);
        	start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx'
    	}
	}
	string html2text(string s) {
		string ret(s);
		replaceAll(ret, "&amp;amp;quot;", "\"");
		replaceAll(ret, "&amp;amp;", "and");
		return ret;
	}
	private:
	stringstream sin;
	void getTagContent(Node &u) {
		char c, div;
		while (sin.get(c)) {
			if (c == '<') {
				while (sin.get(c) && isspace(c));
				u.tag_name = c;
				while (sin.get(c) && c != '>' && !isspace(c))
					u.tag_name += c;
				if (c == '>')	return;
				while (true) {
					while (sin.get(c) && isspace(c));
					if (c == '>')	return;
					string a = "", b = "";
					a += c;
					while (sin.get(c) && !isspace(c) && c != '=')
						a += c;
					while (sin.get(c) && (isspace(c) || c == '"' || c == '\'')) div = c;
					b += c;
					while (sin.get(c) && !isspace(c) && c != div)
						b += c;
					u.attr[a] = b;
				}
				return;
			}
		}
	}
	int build(Node &u, string last) {
		getTagContent(u);
		if (u.tag_name == last)
			return 0;
		while (sin.peek() != EOF && sin.peek() != '<') {
			char c;
			sin.get(c);
			u.content += c;
		}
		u.content = html2text(u.content);
		while (sin.peek() != EOF) {
			while (sin.peek() != EOF && isspace(sin.peek()))
				sin.get();
			if (sin.peek() == '<') {
				u.child.push_back(Node());
				if (!build(u.child[u.child.size() - 1], "/" + u.tag_name)) {
					u.child.pop_back();
					break;
				}
			}
		}	
		return 1;	
	}
};
class StatsModel {
	public: 
		map<string, map<string, int> > categories; 	// count(w, c)
		map<string, int > commentCount; 			// N_{c}
		map<string, int > count_category; 			// count(c)
		map<string, map<string, int> > judgeTable;
		int N, V;
		int microtable[2][2]; // [truth 0:1][classifier 0:1]
		StatsModel() {
			N = V = 0;
			memset(microtable, 0, sizeof(microtable));
		}
		vector<string> normalSentence(string content) {
			vector<string> w;
			stringstream sin(content);
			string token;
			while (sin >> token) {
				token = igntrim(token);
				token = str2lower(token);
				if (token.length() > 0)
					w.push_back(token);
			}
			return w;
		}
		void recordJudge(string truth, string classifier) {
			judgeTable[truth][classifier]++;
			for (map<string, int>::iterator it = commentCount.begin();
				it != commentCount.end(); it++) {
				int t = (it->first == truth);
				int c = (it->first == classifier);
				microtable[t][c]++;
			}
		}
		string judgeComment(string category, string content) {
			vector<string> w = normalSentence(content);
			double Pc, P;
			double maxPwc = - 1e+30;
			string chooseClass = "";
			for (map<string, int>::iterator it = commentCount.begin();
				it != commentCount.end(); it++) {
				Pc = (double)it->second / N;
				P = log(Pc);
				map<string, int> &R = categories[it->first];
				int count_c = count_category[it->first], count_w_c;
				for (int i = 0; i < w.size(); i++) {
					count_w_c = 0;
					if (R.find(w[i]) != R.end())
						count_w_c = R[w[i]];
					P += log((double) (count_w_c + 1)/ (count_c + R.size()));
				}
				if (P > maxPwc) 
					maxPwc = P, chooseClass = it->first;
			}
			recordJudge(category, chooseClass);
			return chooseClass;
		}
		void addComment(string category, string content) {
			commentCount[category]++, N++;
			map<string, int> &S = categories[category];
			vector<string> w = normalSentence(content);
			for (int i = 0; i < w.size(); i++)
				S[w[i]]++;
			count_category[category] += w.size(), V += w.size();
		}
		string igntrim(string s) {
			while (s.size() > 0) {
				if (isspace(s[0]) || s[0] == '.' || s[0] == ',' 
					|| s[0] == '!' || s[0] == '"' || s[0] == '\'')
					s = s.substr(1);
				else {
					int t = s.length();
					if (isspace(s[t-1]) || s[t-1] == '.' || s[t-1] == ',' 
						|| s[t-1] == '!' || s[t-1] == '"')
						s = s.substr(0, t-1);
					else
						return s;
				}
			}
			return s;
		}
		string str2lower(string s) {
			for (int i = 0; i < s.length(); i++)
				s[i] = tolower(s[i]);
			return s;
		}
		void custom() {
			set<string> common;
			int first_set = 1;
			for (map<string, map<string, int> >::iterator it = categories.begin();
				it != categories.end(); it++) {
				map<string, int> &R = it->second;
				set<string> s;
				for (map<string, int>::iterator jt = R.begin(); 
					jt != R.end(); jt++)
					s.insert(jt->first);
				if (first_set) 	common = s, first_set = 0;
				else {
					set<string> C;
					set_intersection(common.begin(), common.end(), s.begin(), s.end(), inserter(C, C.begin()));
					common = C;
				}
			}
			for (map<string, map<string, int> >::iterator it = categories.begin();
				it != categories.end(); it++) {
				map<string, int> &R = it->second;
				for (set<string>::iterator jt = common.begin();
					jt != common.end(); jt++)
					R.erase(*jt);
				int cnt = 0;
				for (map<string, int>::iterator jt = R.begin();
					jt != R.end(); jt++)
					cnt += jt->second;
//				printf("%d -> %d\n", count_category[it->first], cnt);
				count_category[it->first] = cnt;
			}
//			printf("%d\n", common.size());
		}
		void print() {
			printf("%-20s+%10s\n", string(20, '-').c_str(), string(10, '-').c_str());
			for (map<string, int>::iterator it = commentCount.begin();
				it != commentCount.end(); it++) {
				printf("%-20s|%10d\n", it->first.c_str(), it->second);
			}
			printf("%-20s+%10s\n", string(20, '-').c_str(), string(10, '-').c_str());
		}
		void report() {
			// print <judge table>
			printf("|%10s|", "AC\\Assign");
			for (map<string, int>::iterator it = commentCount.begin();
				it != commentCount.end(); it++)
				printf("%10s|", it->first.c_str());
			puts("");
			for (map<string, int>::iterator it = commentCount.begin();
				it != commentCount.end(); it++) {
				printf("|%10s|", it->first.c_str());
				for (map<string, int>::iterator jt = commentCount.begin();
					jt != commentCount.end(); jt++) {
					printf("%10d|", judgeTable[it->first][jt->first]);
				}
				puts("");
			}
			// homework format
			// recall: fraction of docs in class i classified correctly
			// precision: fraction of docs assigned class i that are actually about class i
			const double beta = 1;
			double micro_r = 0, micro_p = 0, macro_r = 0, macro_p = 0, f1;
			for (map<string, int>::iterator it = commentCount.begin();
				it != commentCount.end(); it++) {
				double recall = 0, precision = 0, f1 = 0;
				int sumr = 0, sump = 0;
				for (map<string, int>::iterator jt = commentCount.begin();
					jt != commentCount.end(); jt++) {
					sumr += judgeTable[it->first][jt->first];
					sump += judgeTable[jt->first][it->first];
				}
				recall = (double) judgeTable[it->first][it->first] / sumr;
				precision = (double) judgeTable[it->first][it->first] / sump;
				f1 = (beta * beta + 1) * precision * recall / (beta * beta * precision + recall);
				
				macro_r += recall, macro_p += precision;
				
				printf("%s\n", it->first.c_str());
				printf("%9s :%.10lf\n", "p", precision);
				printf("%9s :%.10lf\n", "r", recall);
				printf("%9s :%.10lf\n", "f", f1);
			}
//			for (int i = 0; i < 2; i++, puts("")) 
//				for (int j = 0; j < 2; j++)
//					printf("%5d ", microtable[i][j]);
			// [truth 0:1][classifier 0:1] = { {TN, FP}, {FN, TP} } 
			micro_r = (double) microtable[1][1] / (microtable[1][1] + microtable[1][0]); // TP / (TP + FN)
			micro_p = (double) microtable[1][1] / (microtable[1][1] + microtable[1][0]); // TP / (TP + FP)
			f1 = (beta * beta + 1) * micro_p * micro_r / (beta * beta * micro_p + micro_r);
			printf("%s\n", "Micro-average");
			printf("%9s :%.10lf\n", "p", micro_p);
			printf("%9s :%.10lf\n", "r", micro_r);
			printf("%9s :%.10lf\n", "f", f1);
			
			macro_r /= commentCount.size();
			macro_p /= commentCount.size();
			f1 = (beta * beta + 1) * macro_p * macro_r / (beta * beta * macro_p + macro_r);
			printf("%s\n", "Macro-average");
			printf("%9s :%.10lf\n", "p", macro_p);
			printf("%9s :%.10lf\n", "r", macro_r);
			printf("%9s :%.10lf\n", "f", f1);
		}
};
int main() {
	ifstream fin("gold_standard.xml");
	string xml = "", line;
	while (getline(fin, line))
		xml += "\n" + line;
	printf("process gold_standard.xml ...\n");
	
	XMLParser xmlDoc(xml);
	StatsModel statsModel;
	
	printf("statistics model built ...\n");
	for (int i = 0; i < xmlDoc.root.child.size(); i++) {
		string 	a = xmlDoc.root.child[i].attr["category"],
				b = xmlDoc.root.child[i].content;
		statsModel.addComment(a, b);
	}
	
	statsModel.print();
//	statsModel.custom();
	
	ifstream tin("test_outcome.xml");
	
	xml = "";
	while (getline(tin, line))
		xml += "\n" + line;
	printf("process test_outcome.xml ...\n");
	
	XMLParser testDoc(xml);
	
	printf("testing test_outcome.xml ...\n");
	for (int i = 0; i < testDoc.root.child.size(); i++) {
		statsModel.recordJudge(xmlDoc.root.child[i].attr["category"], testDoc.root.child[i].attr["category"]);
	}
	
	statsModel.report();
	
	return 0;
}

Read More +

Nov 9 2014

手札日記

遺書

寫給突然逝去的自己，或者忘記自己的自己。

寫這篇時正值 11 月，剛過完 21 歲的生日一個多星期，從意識到畢業的存在時，就覺得那等待畢業的日子跟死亡倒數的日子一樣，有人快樂畢業結束大學，同時也有不少人一同結束使命。

瞻望向前，眼前不是一片光明與希望，只有曾經失敗的目標一度叫我爬去，挑戰是不錯的，但是在大多數人都不將其視為挑戰時，挑戰對我而言就是個懲罰、是個罪。有人會說看看別處，那處又是什麼？已經把現有的資源都鋪在地上，已經沒有辦法前往那兒。那為什麼當初這麼傻呢？的確因為笨，所以才這麼走。

《境界的彼方》從出生到現在一直是緩刑

現在大四搬出來住外面，單純沒有抽到宿舍住，大部分時間跟電腦為伴， 每餐吃飼料維生 ，走路要花半小時，來回一趟就快一個小時，人啊為什麼要吃飯？的確在沒得吃的環境下，這麼想是非常奢侈的。每天從早上起來，照著日常習慣度過一個早上、下午、晚上，走到走廊上裝水一個人都沒有，明明整排有快 20 間房間，卻好像只有一個人住似的。

於是找找蘿莉控閒聊度日 ( 騷擾度日 )，妮可、天祥大神去上班、茵可研究所、學姊弄學碩專題，剩下能一起混的只有自己。這段時間突然講到關閉部落格這件事，蘿莉控很快就收到支持者的信件，信件中這麼描述「有時候我在某些問題上想法卡住時，都可以在您的部落格上找到非常有用的解答，你是一個厲害的 programmer，但是現在無法拜訪您的部落格，希望你邀請我。」當然這封信件是用英文寫的，畢竟在 google 搜尋上，我寫的內容並沒有排名很前面。

以前我也曾經 關閉部落格 過，但是沒有人來信或者說過什麼，其實關閉的理由很簡單，那時被某題卡了很久，找不到人討論、網路上也搜不到解答討論，一氣之下關了部落格。看起來是有點幼稚，那時心裡是這麼想的「寫這麼多想法和思路嘗試幫助別人，而自己想要幫助的時候卻找不到人。」現在的情況也是被題目描述的英文耍得團團轉，看著 google 翻譯猜來猜去，不知不覺就過一個早上、下午，甚至好幾天。

我很羨慕你們，明明有好的語言溝通理解，我卻只能不斷地用猜測來彌補。

《月刊少女野崎君》現在馬上放棄和刻苦鍛鍊一年後放棄

看著不斷低落的英文，一點也不想踏出任何一步。說再好的願景都沒用，最後還是不會接納我。人會前進是因為未知，但是在兩者選擇下，未知也能變成已知。

非常謝謝之後茵可、蘿莉控、學姊、妮可、潘神、 … 等小夥伴，我明白自己的問題很大，明白自己沒有機會學得比別人好時就會放棄，不懂時就急著要答案，遇到自己不會的領域直接舉白旗，永遠推不了的遊戲坑，非常難以調教的奴 … 等。

用 uhunt 的差集找 flere - morris821028 的題目來補，明明題數是兩倍之多，但是仍然有 60 題未解之題，開了數題來寫，也爬到 2570 AC。中間只有一句話可說「這麼這麼噁心的題目！」說來也是，很多題目是 World Final 或者是區賽的鬼題，聽蘿莉控講是集訓時被強迫寫的。心想也是，一直以來沒有被強迫寫過題目，「 單純只是不想輸，我還不想輸。 」

想到集訓方面，開學時候去交大旁聽課程，後來放棄去聽，其中最重要的原因是「 原來四年學習差這麼多了 」旁邊都是交大學生，有資格坐在同一間教室嗎我？

《四月是你的謊言》你應該明白吧，我能一直在你身旁給你提供的幫助並不是無限的

好啦，我能寫得就盡量寫，不會寫成文章的代碼就會丟 github，如果找不到去 github 挖一挖大概會有，從來沒有私藏過，如果找不到就是遇到失落的硬碟毀損那次－空白的五個月。如果有人來需要我的話，我會盡量放出來、寫下來， 直到我還沒有怨恨這個世界為止 。隨時將代碼放出來，防止自己哪天突然沒辦法做到這件事，同時加速這資訊流動，不再於抄不抄襲、花不花時間思考，的確給予解答是不好，因為沒有思想多樣性，想要幫助跟我一樣的人，另一個我說不定正在苦惱，說不定下一個我會看到這個我。

《甘城輝煌樂園救世主》沒有工資！

現在研究所推甄結果出來，成大交大資工所已經逕行錄取。「我啊，到底有什麼資格？沒有英文的我什麼都不是，沒有我的生存之地。」再見了，未來。

Read More +