|
# The list of charsets understood by Python 3.13, omitted for brevity here.
|
|
charsets = ['ascii', 'big5', 'big5hkscs', 'cp037', ...]
|
|
|
|
# Read the CSV file as bytes
|
|
with open('registration.csv','rb') as f:
|
|
d = f.read()
|
|
|
|
# Try to read character ``i`` from the file using the given charset.
|
|
def try_read(charset,i):
|
|
s = d[i:i+1]
|
|
try:
|
|
return s.decode(charset)
|
|
except UnicodeDecodeError:
|
|
return None
|
|
|
|
# Positions of characters that have caused problems, and what they should decode to.
|
|
strings = [(4071,'å'), (6270,'š')]
|
|
|
|
# Work through each of the problems and narrow down the list of charsets.
|
|
sets = charsets[:]
|
|
for i,str in strings:
|
|
sets = [s for s in sets if try_read(s,i)==str]
|
|
print(i,str,sets)
|