In [2]:
import pandas as pd
from functools import reduce
In [3]:
data=[pd.read_table('%d.txt'%i) for i in range(2, 5)]
In [4]:
def merge_out(x, y):
return pd.merge(x, y, on='单', how='outer')
jointed=reduce(merge_out, data)
# 具有多种堆叠方式的字
# 具有二三四堆叠
In [5]:
jointed.dropna()
Out[5]:
# 具有二三四堆叠的单字
In [30]:
def single_word(table):
return ''.join(table.dropna()['单'].drop_duplicates().tolist())
In [31]:
single_word(jointed)
Out[31]:
# 金木水火土
- 金 鍂 鑫 𨰻
- 木 林 森 𣛧 𣡽
- 土 圭 垚 㙓
- 水 沝 淼 㵘
- 火 炏 焱 燚
# 只具有二三堆叠的单字
In [34]:
S3=set(single_word(jointed[['单','双','三']]))
S4=set(single_word(jointed))
''.join(S3-S4)
Out[34]:
# 双叠字
In [6]:
jointed[['单','双']].drop_duplicates().dropna()
Out[6]:
# 三叠字
In [7]:
jointed[['单','三']].drop_duplicates().dropna()
Out[7]:
# 四叠字
In [8]:
jointed[['单','四']].drop_duplicates().dropna()
Out[8]:
Comments
comments powered by Disqus