文字列関連
文字列をつくる 各種操作・メソッド 正規表現
・文字列をつくる
・各種操作・メソッド
・正規表現
文字列をつくる 各種操作・メソッド 正規表現
・文字列をつくる
print('文字列をつくる ')
print("'strings'", '->', 'strings')
print('"strings"' '->', "strings")
print("""'string"s"'""", '->', 'string"s"')
print('str(100)', '->', str(100))
print('"""strings"""', '->', """strings""")
print('======================================')
print('raw文字列:エスケープシーケンス無効化')
print('通常:', r'D:\\Jupy\\Django\\ai_lab', '\n->', 'D:\\Jupy\\Django\\ai_lab')
print('raw文字列:', r'D:\\Jupy\\Django\\ai_lab', '\n->', r'D:\\Jupy\\Django\\ai_lab')
print('======================================')
print('長い文字列の変数宣言 ')
print("s = ('11111111111'")
print(" '22222222222') ")
s = ('11111111111'
'22222222222')
print(s)
print("s = '11111111111'\ ")
print(" '22222222222'")
s = '11111111111'\
'22222222222'
print(s)
文字列をつくるに戻る
・各種操作・メソッド
各種操作
print('s = "strings"')
s = "strings"
print('インデックス指定')
print('s[0], s[-1]')
print(s[0], s[-1])
print('==================================')
print('スライス')
print('s[:3]')
print(s[:3])
print('==================================')
print('コピー')
print('s2 = s[:]')
s2 = s[:]
print('s2 =', s2)
print('==================================')
print('文字列長 ')
print('len(s) ->', len(s))
print('==================================')
print('乗算 ')
print("'strings' * 0 ->", 'strings' * 0)
print("'strings' * 2 ->", 'strings' * 2)
各種操作・メソッドに戻る
変形など
s = "strings strings"
print('s =', s)
print('最初だけ大文字')
print('s.capitalize()')
print(s.capitalize())
print('==============================')
print('頭文字を大文字')
print('s.title()')
print(s.title())
print('==============================')
print('すべて大文字')
print('s.upper()')
print(s.upper())
print('==============================')
print('すべて小文字')
print('s.lower()')
print(s.lower())
print('==============================')
print('置換(数も指定可)')
print("s.replace('s', 'S', 2)")
print(s.replace('s', 'S', 2))
print('==============================')
print('端の空白や指定文字を除去 ')
s = "' strings '"
print('s =', s)
s = ' strings '
print('両端除去')
print(f"'{s.strip()}'")
print('左側除去')
print(f"'{s.lstrip()}'")
print('右側除去')
print(f"'{s.rstrip()}'")
print('==============================')
print('先頭または末尾の文字を除去')
print('(先頭、末尾から連続)')
print("'abaacdefgg'.strip('abdfg')")
print('->', 'abaacdefgg'.strip('abdfg'))
print('==============================')
print('文字寄せ ')
s = "'strings'"
print('s =', s)
s = 'strings'
print('右寄せ')
print('s.rjust(12)')
print(f"'{s.rjust(12)}'")
print('中央寄せ')
print('s.center(12)')
print(f"'{s.center(12)}'")
print('左寄せ')
print('s.ljust(12)')
print(f"'{s.ljust(12)}'")
print('==============================')
print('数字文字列をゼロ埋め ')
s = "100"
print('s =', s)
print('s.zfill(10)')
print(f"'{s.zfill(10)}'")
各種操作・メソッドに戻る
リスト化・結合
print('文字列のリスト化')
s = "'strings'"
print('s =', s)
s = 'strings'
print('list(s)')
print(list(s))
print('====================================')
print('デリミタで区切った値のリストを返す')
s = "'s,t,r,i,n,g,s'"
print('s =', s)
s = 's,t,r,i,n,g,s'
print("s.split(',')")
print(s.split(','))
print('====================================')
print('複数行の文字列を行分割 ')
print(r"'a\nb\nc'.splitlines()")
print('a\nb\nc'.splitlines())
print('====================================')
print('文字列リストを結合')
print("''.join(['stri', 'ngs'])")
print(''.join(['stri', 'ngs']))
各種操作・メソッドに戻る
カウント・検索など
s = "'strings'"
print('s =', s)
s = 'strings'
print('指定文字の出現回数 ')
print("s.count('s')")
print(s.count('s'))
print('============================================')
print('指定文字列で始まるかどうか?')
print("s.startswith('s')")
print(s.startswith('s'))
print('============================================')
print('指定文字列で終わるかどうか?')
print("s.endswith('s')")
print(s.endswith('s'))
print('============================================')
print('マッチしたindexを返す(なしは-1)')
print("s.find('s')")
print(s.find('s'))
print('============================================')
print('後方から')
print("s.rfind('s')")
print(s.rfind('s'))
print('============================================')
print('マッチしたindexを返す(なしはValurError)')
print("s.index('s')")
print(s.index('s'))
print('============================================')
print('すべての文字がアルファベットならTrue ')
print("s.isalpha()")
print(s.isalpha())
print('============================================')
print('すべての文字が数字ならTrue')
s = "'100'"
print('s =', s)
s = '100'
print("s.isnumeric()")
print(s.isnumeric())
print('============================================')
print('すべての文字がアルファベットか数字ならTrue ')
print("s.isalnum()")
print(s.isalnum())
各種操作・メソッドに戻る
文字列フォーマット
print('format文字列')
print('表示幅桁数指定')
print("'{:10}'.format(12345)")
print(f"'{'{:10}'.format(12345)}'")
print('=================================================')
print('少数点以下桁数指定')
print("'{:.3}'.format(0.12345)")
print('{:.3}'.format(0.12345))
print('=================================================')
print('辞書渡す(数値は引数index)')
print("'{0[a]}-{1[b]}'.format({'a': 123}, {'b': 4567})")
print('{0[a]}-{1[b]}'.format({'a': 123}, {'b': 4567}))
print('=================================================')
print('カンマ区切り')
print("'{:,}'.format(10000)")
print('{:,}'.format(10000))
print('=================================================')
print('パーセント')
print("'{0:.1%}'.format(1/3)")
print('{0:.1%}'.format(1/3))
print('=================================================')
print('ゼロパディング')
print("'{:03d}'.format(1)")
print('{:03d}'.format(1))
print('=================================================')
print('文字寄せ+文字埋め')
print('右寄せ+*埋め')
print("'{:*>15s}'.format('strings')")
print('{:*>15s}'.format('strings'))
print('中央寄せ+-埋め')
print("'{:-^15s}'.format('strings')")
print('{:-^15s}'.format('strings'))
print('=================================================')
print('f-strings(幅、小数点以下桁数指定可)')
n = 0.12345
print('n =', n)
print("f'{n:06.3}'")
print(f'{n:06.3}')
print('=================================================')
print('%(型、幅、小数点以下桁数指定可)')
print("'%s %04d %.1f' % ('strings', 1, 3.14)")
print('%s %04d %.1f' % ('strings', 1, 3.14))
print("'%(a)s %(b)06.2f' % {'a':'str', 'b':3.3333}")
print('%(a)s %(b)06.2f' % {'a':'str', 'b':3.3333})
各種操作・メソッドに戻る
・正規表現
基本例など
正規表現 | マッチする文字列 |
---|---|
. | あらゆる文字1文字 |
* | 0回以上の繰り返し |
+ | 1回以上の繰り返し |
? | 0,1回の繰り返し |
{m} | m回の繰り返し |
{m,n} | m回以上n回までの繰り返し |
^ | 先頭から |
$ | 末尾から |
\ | エスケープ |
[] | 文字集合のどれか |
[^] | 文字集合以外 |
| | または |
() | キャプチャ(正確) |
(?:) | キャプチャしない |
特殊文字(メタ文字) | |
\d | 1文字の数字(=[0-9]) |
\D | 数字以外 |
\s | 空白文字 |
\S | 空白文字以外 |
\w | 英数字_ |
\W | 英数字_以外 |
*
print('import re')
import re
s = "'strings abcdefg'"
print('s =', s)
s = 'strings abcdefg'
print("r = r's\w*'")
r = r's\w*'
print('re.findall(r, s)')
print('->', re.findall(r, s))
正規表現に戻る
+
print('import re')
import re
s = "'strings abcdefg'"
print('s =', s)
s = 'strings abcdefg'
print("r = r'\w+'")
r = r'\w+'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
?
print('import re')
import re
s = "'http https htp'"
print('s =', s)
print("r = r'https?'")
r = r'https?'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
{m}
print('import re')
import re
s = "'id:12345, id:23456, id:789'"
print('s =', s)
s = 'id:12345, id:23456, id:789'
print("r = r'\d{5}'")
r = r'\d{5}'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
{m,n}, \d, \D
print('import re')
import re
s = "' 100, 1000, 10000, 100000 '"
print('s =', s)
s = ' 100, 1000, 10000, 100000 '
print("r = r'\D(\d{3,5})\D'")
r = r'\D(\d{3,5})\D'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
^
print('import re')
import re
s = "'strings abcdefg'"
print('s =', s)
s = 'strings abcdefg'
print("r = r'^\w+'")
r = r'^\w+'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
$
print('import re')
import re
s = "'strings abcdefg'"
print('s =', s)
s = 'strings abcdefg'
print("r = r'\w+$'")
r = r'\w+$'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
\(エスケープ)
print('import re')
import re
s = "'$100 $200 300'"
print('s =', s)
s = '$100 $200 300'
print("r = r'\$\d+'")
r = r'\$\d+'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
[], [^]
print('import re')
import re
s = "'hit bit pit kit'"
print('s =', s)
print("r = r'[hb]it'")
r = r'[hb]it'
print("re.findall(r, s)")
print('->', re.findall(r, s))
print('======================')
print("r = r'[^hb]it'")
r = r'[^hb]it'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
|(または)
print('import re')
import re
s = "'hit bit pit kit'"
print('s =', s)
print("r = r'hit|bit'")
r = r'hit|bit'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
()キャプチャ
print('import re')
import re
s = "'id:12345, id:23456'"
print('s =', s)
s = 'id:12345, id:23456'
print("r = r'id:(\d+)'")
r = r'id:(\d+)'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
(?:)キャプチャしない
print('import re')
import re
s = "'id:12345, pw:23456'"
print('s =', s)
print("r = r'(?:id:|pw:)(\d+)'")
r = r'(?:id:|pw:)(\d+)'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
\s, \S
print('import re')
import re
s = "'abcd efgh '"
print('s =', s)
s = 'abcd efgh '
print("r = r'\s(\S+)'")
r = r'\s(\S+)'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
\w, \W
print('import re')
import re
s = "'12345abcde(12345),abc,a_b_c'"
print('s =', s)
s = '12345abcde(12345),abc,a_b_c'
print("r = r'\W(\w+)'")
r = r'\W(\w+)'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
パターン例など
引数オプション
subメソッド
時刻
電話番号
URL(簡易版)
メールアドレス(簡易版)
引数オプション(re.I, re.M, re.DOTALL)
print('import re')
import re
s = "'Strings'"
print('s =', s)
print("r = r's\w+'")
r = r's\w+'
print("re.findall(r, s)")
print('->', re.findall(r, s))
print('re.I: 大文字小文字を区別なし')
print("re.findall(r, s, re.I)")
print('->', re.findall(r, s, re.I))
print('============================================')
s = r"'abcde\nfghij\nklmno'"
print('s =', s)
s = 'abcde\nfghij\nklmno'
print("r = r'^\w+'")
r = r'^\w+'
print("re.findall(r, s)")
print('->', re.findall(r, s))
print('============================================')
print('re.M: ^が行頭にもマッチ,$が行末にもマッチ')
print("re.findall(r, s, re.M)")
print('->', re.findall(r, s, re.M))
print('============================================')
print("r = r'.*'")
r = r'.*'
print("re.findall(r, s)")
print('->', re.findall(r, s))
print('============================================')
print('re.DOTALL: .が改行にもマッチ')
print("re.findall(r, s, re.DOTALL)")
print('->', re.findall(r, s, re.DOTALL))
正規表現に戻る
subメソッド
print('パスワードを*に置換')
print('import re')
import re
s = "'name: Animal, password: 123456'"
print('s =', s)
print("r = r'\d+'")
r = r'\d+'
print("re.sub(r, '********', s)")
print('->', re.sub(r, '********', s))
正規表現に戻る
時刻
print('import re')
import re
s = "'12:00, 01:00, 2:00'"
print('s =', s)
s = '12:00, 01:00, 2:00'
print("r = r'\d?\d:\d{2}'")
print("or r'\d{1,2}:\d{2}'")
r = r'\d?\d:\d{2}'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
電話番号
print('import re')
import re
s = "'123-456-7890, 12-345-6789'"
print('s =', s)
s = '123-456-7890, 12-345-6789'
print("r = r'\d{2,3}-\d{3}-\d{4}'")
r = r'\d{2,3}-\d{3}-\d{4}'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
URL(簡易版)
print('import re')
import re
s = "'https://animal-ai-lab.com, (http://animal-ai-lab.com) https://animal-ai-lab.com '"
print('s =', s)
s = 'https://animal-ai-lab.com, (http://animal-ai-lab.com) https://animal-ai-lab.com '
print('+?は非貪欲')
print("r = r'(https?.+?)[ ,)]'")
r = r'(https?.+?)[ ,)]'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
メールアドレス(簡易版)
print('import re')
import re
s = "'abc@def.ghi.ne.jp, a_b-c@d_e-f.ghi.ne.jp '"
print('s =', s)
s = 'abc@def.ghi.ne.jp, a_b-c@d_e-f.ghi.ne.jp '
print("r = r'[\w.-]+@[\w.-]+'")
r = r'[\w.-]+@[\w.-]+'
print("re.findall(r, s)")
print('->', re.findall(r, s))
正規表現に戻る
Unicode関連
Unicode一覧(外部リンク):https://ja.wikipedia.org/wiki/Unicode%E4%B8%80%E8%A6%A7%E8%A1%A8
print('import re')
import re
print('import regex')
import regex
s = "'あいうえおアイウエオabcdeABCDE12345一二三四五'"
print('s =', s)
s = 'あいうえおアイウエオabcdeABCDE12345一二三四五'
print('ひらがな')
print(r"r = r'[\u3041-\u309F]+'")
print("r = r'[\u3041-\u309F]+'")
r = r'[\u3041-\u309F]+'
print("re.findall(r, s)")
print('->', re.findall(r, s))
print('======================================================')
print('カタカナ')
print(r"r = r'[\u30A1-\u30FF]+'")
print("r = r'[\u30A1-\u30FF]+'")
r = r'[\u30A1-\u30FF]+'
print("re.findall(r, s)")
print('->', re.findall(r, s))
print('======================================================')
print('英字')
print(r"r = r'[\u0041-\u005A]+|[\u0061-\u007A]+'")
print("r = r'[\u0041-\u005A]+|[\u0061-\u007A]+'")
r = r'[\u0041-\u005A]+|[\u0061-\u007A]+'
print("re.findall(r, s)")
print('->', re.findall(r, s))
print('======================================================')
print('数字')
print(r"r = r'[\u0030-\u0039]+'")
print("r = r'[\u0030-\u0039]+'")
r = r'[\u0030-\u0039]+'
print("re.findall(r, s)")
print('->', re.findall(r, s))
print('======================================================')
print('漢字')
print(r"r = r'\p{Script=Han}+'")
print("r = r'\p{Script=Han}+'")
r = r'\p{Script=Han}+'
print("regex.findall(r, s)")
print('->', regex.findall(r, s))
print('======================================================')
print('正規化(unicodedataモジュール)')
print('import unicodedata')
import unicodedata
s = "'strings'"
print('s =', s)
print("unicodedata.normalize('NFKC', s)")
print('->', unicodedata.normalize('NFKC', s))
正規表現に戻る