/android/import-contacts

To get this branch, use:
bzr branch http://bzr.ed.am/android/import-contacts
6 by edam
- added GPL header comments to all files
1
/*
2
 * VCFImporter.java
3
 *
4
 * Copyright (C) 2009 Tim Marston <edam@waxworlds.org>
5
 *
6
 * This file is part of the Import Contacts program (hereafter referred
7
 * to as "this program"). For more information, see
8
 * http://www.waxworlds.org/edam/software/android/import-contacts
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation, either version 3 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 */
23
14 by edam
- got rid of the pretend ImportContacts activity alltogether (and made the Intro activity the startup one)
24
package org.waxworlds.edam.importcontacts;
1 by edam
Initial import
25
26
import java.io.BufferedReader;
27
import java.io.File;
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
28
import java.io.FileInputStream;
1 by edam
Initial import
29
import java.io.FileNotFoundException;
30
import java.io.FileReader;
31
import java.io.FilenameFilter;
32
import java.io.IOException;
33
import java.io.UnsupportedEncodingException;
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
34
import java.nio.ByteBuffer;
37 by edam
- updated TODO and NEWS
35
import java.util.ArrayList;
1 by edam
Initial import
36
import java.util.Arrays;
37
import java.util.HashSet;
36 by edam
- formatting: removed some double-indents on overrunning lines
38
import java.util.Iterator;
1 by edam
Initial import
39
import java.util.List;
37 by edam
- updated TODO and NEWS
40
import java.util.NoSuchElementException;
1 by edam
Initial import
41
import java.util.Set;
42
import java.util.Vector;
43
import java.util.regex.Matcher;
44
import java.util.regex.Pattern;
45
46
import android.content.SharedPreferences;
47
import android.provider.Contacts;
48
import android.provider.Contacts.PhonesColumns;
49
50
public class VCFImporter extends Importer
51
{
52
	private int _vCardCount = 0;
53
	private int _progress = 0;
54
55
	public VCFImporter( Doit doit )
56
	{
57
		super( doit );
58
	}
59
60
	@Override
61
	protected void onImport() throws AbortImportException
62
	{
63
		SharedPreferences prefs = getSharedPreferences();
64
65
		// update UI
66
		setProgressMessage( R.string.doit_scanning );
67
68
		// get a list of vcf files
69
		File[] files = null;
70
		try
71
		{
72
			// open directory
19 by edam
- added file chooser
73
			String path = "/sdcard" + prefs.getString( "location", "/" );
74
			File file = new File( path );
75
			if( !file.exists() )
1 by edam
Initial import
76
				showError( R.string.error_locationnotfound );
77
15 by edam
- added facility to enter a filename (instead of a directory to scan) and just use that
78
			// directory, or file?
19 by edam
- added file chooser
79
			if( file.isDirectory() )
15 by edam
- added facility to enter a filename (instead of a directory to scan) and just use that
80
			{
81
				// get files
82
				class VCardFilter implements FilenameFilter {
83
					public boolean accept( File dir, String name ) {
84
						return name.toLowerCase().endsWith( ".vcf" );
85
					}
13 by edam
- converted project to use Android 1.5 SDK
86
				}
19 by edam
- added file chooser
87
				files = file.listFiles( new VCardFilter() );
15 by edam
- added facility to enter a filename (instead of a directory to scan) and just use that
88
			}
89
			else
90
			{
91
				// use just this file
92
				files = new File[ 1 ];
19 by edam
- added file chooser
93
				files[ 0 ] = file;
15 by edam
- added facility to enter a filename (instead of a directory to scan) and just use that
94
			}
1 by edam
Initial import
95
		}
96
		catch( SecurityException e ) {
97
			showError( R.string.error_locationpermissions );
98
		}
99
100
		// check num files and set progress max
101
		if( files != null && files.length > 0 )
102
			setProgressMax( files.length );
103
		else
104
			showError( R.string.error_locationnofiles );
105
106
		// scan through the files
107
		setTmpProgress( 0 );
108
		for( int i = 0; i < files.length; i++ ) {
109
			countVCardFile( files[ i ] );
110
			setTmpProgress( i );
111
		}
112
		setProgressMax( _vCardCount );	// will also update tmp progress
113
114
		// import them
115
		setProgress( 0 );
116
		for( int i = 0; i < files.length; i++ )
117
			importVCardFile( files[ i ] );
118
	}
119
120
	private void countVCardFile( File file ) throws AbortImportException
121
	{
122
		try
123
		{
124
			// open file
125
			BufferedReader reader = new BufferedReader(
36 by edam
- formatting: removed some double-indents on overrunning lines
126
				new FileReader( file ) );
1 by edam
Initial import
127
128
			// read
129
			String line;
130
			boolean inVCard = false;
131
			while( ( line = reader.readLine() ) != null )
132
			{
133
				if( !inVCard ) {
134
					// look for vcard beginning
36 by edam
- formatting: removed some double-indents on overrunning lines
135
					if( line.matches( "^BEGIN:VCARD" ) ) {
1 by edam
Initial import
136
						inVCard = true;
137
						_vCardCount++;
138
					}
139
				}
36 by edam
- formatting: removed some double-indents on overrunning lines
140
				else if( line.matches( "^END:VCARD" ) )
1 by edam
Initial import
141
					inVCard = false;
142
			}
143
144
		}
145
		catch( FileNotFoundException e ) {
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
146
			showError( getText( R.string.error_filenotfound ) +
147
				file.getName() );
1 by edam
Initial import
148
		}
149
		catch( IOException e ) {
150
			showError( getText( R.string.error_ioerror ) + file.getName() );
151
		}
152
	}
153
154
	private void importVCardFile( File file ) throws AbortImportException
155
	{
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
156
		// check file is good
157
		if( !file.exists() )
158
			showError( getText( R.string.error_filenotfound ) +
159
				file.getName() );
160
		if( file.length() == 0 )
161
			showError( getText( R.string.error_fileisempty ) +
162
				file.getName() );
163
1 by edam
Initial import
164
		try
165
		{
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
166
			// open/read file
167
			FileInputStream istream = new FileInputStream( file );
168
			byte[] content = new byte[ (int)file.length() ];
169
			istream.read( content );
170
171
			// import
172
			importVCardFileContent( content, file.getName() );
1 by edam
Initial import
173
		}
174
		catch( FileNotFoundException e ) {
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
175
			showError( getText( R.string.error_filenotfound ) +
176
				file.getName() );
1 by edam
Initial import
177
		}
178
		catch( IOException e ) {
179
			showError( getText( R.string.error_ioerror ) + file.getName() );
180
		}
181
	}
182
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
183
	private void importVCardFileContent( byte[] content, String fileName )
36 by edam
- formatting: removed some double-indents on overrunning lines
184
		throws AbortImportException
1 by edam
Initial import
185
	{
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
186
		// go through lines
1 by edam
Initial import
187
		VCard vCard = null;
36 by edam
- formatting: removed some double-indents on overrunning lines
188
		ContentLineIterator cli = new ContentLineIterator( content );
189
		while( cli.hasNext() )
1 by edam
Initial import
190
		{
36 by edam
- formatting: removed some double-indents on overrunning lines
191
			ByteBuffer buffer = cli.next();
192
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
193
			// get a US-ASCII version of the line for processing
194
			String line;
195
			try {
36 by edam
- formatting: removed some double-indents on overrunning lines
196
				line = new String( buffer.array(), buffer.position(),
197
					buffer.limit() - buffer.position(), "US-ASCII" );
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
198
			}
199
			catch( UnsupportedEncodingException e ) {
200
				// we know US-ASCII is supported, so appease the compiler...
201
				line = "";
202
			}
1 by edam
Initial import
203
204
			if( vCard == null ) {
205
				// look for vcard beginning
36 by edam
- formatting: removed some double-indents on overrunning lines
206
				if( line.matches( "^BEGIN:VCARD" ) ) {
1 by edam
Initial import
207
					setProgress( ++_progress );
208
					vCard = new VCard();
209
				}
210
			}
211
			else {
212
				// look for vcard content or ending
36 by edam
- formatting: removed some double-indents on overrunning lines
213
				if( line.matches( "^END:VCARD" ) )
1 by edam
Initial import
214
				{
215
					// store vcard and do away with it
216
					try {
217
						vCard.finaliseParsing();
218
						importContact( vCard );
219
					}
220
					catch( VCard.ParseException e ) {
221
						skipContact();
222
						if( !showContinue(
36 by edam
- formatting: removed some double-indents on overrunning lines
223
							getText( R.string.error_vcf_parse ).toString()
224
							+ fileName + "\n" + e.getMessage() ) )
225
						{
3 by edam
- added "all done" message
226
							finish( ACTION_ABORT );
36 by edam
- formatting: removed some double-indents on overrunning lines
227
						}
1 by edam
Initial import
228
					}
229
					catch( VCard.SkipContactException e ) {
230
						skipContact();
231
						// do nothing
232
					}
233
					vCard = null;
234
				}
235
				else
236
				{
237
					// try giving the line to the vcard
238
					try {
36 by edam
- formatting: removed some double-indents on overrunning lines
239
						vCard.parseLine( buffer, line,
240
							cli.doesNextLineLookFolded() );
1 by edam
Initial import
241
					}
242
					catch( VCard.ParseException e ) {
243
						skipContact();
244
						if( !showContinue(
36 by edam
- formatting: removed some double-indents on overrunning lines
245
							getText( R.string.error_vcf_parse ).toString()
246
							+ fileName + "\n" + e.getMessage() ) )
247
						{
3 by edam
- added "all done" message
248
							finish( ACTION_ABORT );
36 by edam
- formatting: removed some double-indents on overrunning lines
249
						}
1 by edam
Initial import
250
251
						// although we're continuing, we still need to abort
252
						// this vCard. Further lines will be ignored until we
253
						// get to another BEGIN:VCARD line.
254
						vCard = null;
255
					}
256
					catch( VCard.SkipContactException e ) {
257
						skipContact();
258
						// abort this vCard. Further lines will be ignored until
259
						// we get to another BEGIN:VCARD line.
260
						vCard = null;
261
					}
262
				}
263
			}
264
		}
265
	}
266
36 by edam
- formatting: removed some double-indents on overrunning lines
267
	class ContentLineIterator implements Iterator< ByteBuffer >
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
268
	{
36 by edam
- formatting: removed some double-indents on overrunning lines
269
		protected byte[] _content = null;
270
		protected int _pos = 0;
271
272
		public ContentLineIterator( byte[] content )
273
		{
274
			_content = content;
275
		}
276
277
		@Override
278
		public boolean hasNext()
279
		{
280
			return _pos < _content.length;
281
		}
282
283
		@Override
284
		public ByteBuffer next()
285
		{
286
			int initial_pos = _pos;
287
288
			// find newline
289
			for( ; _pos < _content.length; _pos++ )
290
				if( _content[ _pos ] == '\n' )
291
				{
292
					// adjust for a \r preceding the \n
293
					int to = ( _pos > 0 && _content[ _pos - 1 ] == '\r' &&
294
						_pos > initial_pos )? _pos - 1 : _pos;
295
					_pos++;
296
					return ByteBuffer.wrap( _content, initial_pos,
297
						to - initial_pos );
298
				}
299
300
			// we didn't find one, but were there bytes left?
301
			if( _pos != initial_pos ) {
302
				int to = _pos;
303
				_pos++;
304
				return ByteBuffer.wrap( _content, initial_pos,
305
					to - initial_pos );
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
306
			}
36 by edam
- formatting: removed some double-indents on overrunning lines
307
308
			// no bytes left
309
			throw new NoSuchElementException();
310
		}
311
312
		@Override
313
		public void remove()
314
		{
315
			throw new UnsupportedOperationException();
316
		}
317
318
		/**
319
		 * Does the next line, if there is one, look like it should be folded
320
		 * onto the end of this one?
321
		 * @return
322
		 */
323
		public boolean doesNextLineLookFolded()
324
		{
325
			return _pos > 0 && _pos < _content.length &&
326
				_content[ _pos - 1 ] == '\n' && _content[ _pos ] == ' ';
327
		}
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
328
	}
329
1 by edam
Initial import
330
	private class VCard extends ContactData
331
	{
332
		private final static int NAMELEVEL_NONE = 0;
333
		private final static int NAMELEVEL_ORG = 1;
334
		private final static int NAMELEVEL_FN = 2;
335
		private final static int NAMELEVEL_N = 3;
336
37 by edam
- updated TODO and NEWS
337
		private final static int MULTILINE_NONE = 0;
338
		private final static int MULTILINE_ENCODED = 1;	// v2.1 quoted-printable
339
		private final static int MULTILINE_ESCAPED = 2;	// v2.1 \\CRLF
340
		private final static int MULTILINE_FOLDED = 3;	// v3.0 folding
341
1 by edam
Initial import
342
		private String _version = null;
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
343
		private Vector< ByteBuffer > _buffers = null;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
344
		private int _name_level = NAMELEVEL_NONE;
37 by edam
- updated TODO and NEWS
345
		private int _parser_multiline_state = MULTILINE_NONE;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
346
		private String _parser_current_name_and_params = null;
347
		private String _parser_buffered_value_so_far = "";
348
349
		protected class UnencodeResult
350
		{
351
			private boolean _another_line_required;
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
352
			private ByteBuffer _buffer;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
353
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
354
			public UnencodeResult( boolean another_line_required,
355
				ByteBuffer buffer )
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
356
			{
357
				_another_line_required = another_line_required;
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
358
				_buffer = buffer;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
359
			}
360
361
			public boolean isAnotherLineRequired()
362
			{
363
				return _another_line_required;
364
			}
365
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
366
			public ByteBuffer getBuffer()
367
			{
368
				return _buffer;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
369
			}
370
		}
1 by edam
Initial import
371
14 by edam
- got rid of the pretend ImportContacts activity alltogether (and made the Intro activity the startup one)
372
		@SuppressWarnings("serial")
1 by edam
Initial import
373
		protected class ParseException extends Exception
374
		{
14 by edam
- got rid of the pretend ImportContacts activity alltogether (and made the Intro activity the startup one)
375
			@SuppressWarnings("unused")
1 by edam
Initial import
376
			public ParseException( String error )
377
			{
378
				super( error );
379
			}
380
381
			public ParseException( int res )
382
			{
383
				super( VCFImporter.this.getText( res ).toString() );
384
			}
385
		}
386
14 by edam
- got rid of the pretend ImportContacts activity alltogether (and made the Intro activity the startup one)
387
		@SuppressWarnings("serial")
1 by edam
Initial import
388
		protected class SkipContactException extends Exception { }
389
36 by edam
- formatting: removed some double-indents on overrunning lines
390
		private String extractCollonPartFromLine( ByteBuffer buffer,
391
			String line, boolean former )
392
		{
393
			String ret = null;
394
395
			// get a US-ASCII version of the line for processing, unless we were
396
			// supplied with one
397
			if( line == null ) {
398
				try {
399
					line = new String( buffer.array(), buffer.position(),
400
						buffer.limit() - buffer.position(), "US-ASCII" );
401
				}
402
				catch( UnsupportedEncodingException e ) {
403
					// we know US-ASCII is supported, so appease the compiler...
404
					line = "";
405
				}
406
			}
407
408
			// split line into name and value parts and check to make sure we
409
			// only got 2 parts and that the first part is not zero in length
410
			String[] parts = line.split( ":", 2 );
411
			if( parts.length == 2 && parts[ 0 ].length() > 0 )
412
				ret = parts[ former? 0 : 1 ];
413
414
			return ret;
415
		}
416
417
		private String extractNameAndParamsFromLine( ByteBuffer buffer,
418
			String line )
419
		{
420
			return extractCollonPartFromLine( buffer, line, true );
421
		}
422
423
		private String extractValueFromLine( ByteBuffer buffer, String line )
424
		{
425
			return extractCollonPartFromLine( buffer, line, false );
426
		}
427
428
		public void parseLine( ByteBuffer buffer, String line,
429
			boolean next_line_looks_folded )
430
			throws ParseException, SkipContactException,
431
			AbortImportException
432
		{
433
			// do we have a version yet?
1 by edam
Initial import
434
			if( _version == null )
435
			{
36 by edam
- formatting: removed some double-indents on overrunning lines
436
				// tentatively get name and params from line
437
				String name_and_params =
438
					extractNameAndParamsFromLine( buffer, line );
439
440
				// is it a version line?
441
				if( name_and_params != null &&
442
					name_and_params.equals( "VERSION" ) )
1 by edam
Initial import
443
				{
36 by edam
- formatting: removed some double-indents on overrunning lines
444
					// yes, get it!
445
					String value = extractValueFromLine( buffer, line );
446
					if( !value.equals( "2.1" ) && !value.equals( "3.0" ) )
1 by edam
Initial import
447
						throw new ParseException( R.string.error_vcf_version );
36 by edam
- formatting: removed some double-indents on overrunning lines
448
					_version = value;
1 by edam
Initial import
449
36 by edam
- formatting: removed some double-indents on overrunning lines
450
					// parse any buffers we've been accumulating while we waited
451
					// for a version
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
452
					if( _buffers != null )
453
						for( int i = 0; i < _buffers.size(); i++ )
36 by edam
- formatting: removed some double-indents on overrunning lines
454
							parseLine( _buffers.get( i ), null,
455
								i + 1 < _buffers.size() &&
456
								_buffers.get( i + 1 ).hasRemaining() &&
457
								_buffers.get( i + 1 ).get(
458
									_buffers.get( i + 1 ).position() ) == ' ' );
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
459
					_buffers = null;
1 by edam
Initial import
460
				}
461
				else
462
				{
36 by edam
- formatting: removed some double-indents on overrunning lines
463
					// no, so stash this line till we get a version
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
464
					if( _buffers == null )
465
						_buffers = new Vector< ByteBuffer >();
466
					_buffers.add( buffer );
1 by edam
Initial import
467
				}
468
			}
469
			else
470
			{
36 by edam
- formatting: removed some double-indents on overrunning lines
471
				// name and params and the position in the buffer where the
472
				// "value" part of the line start
473
				String name_and_params;
474
				int pos;
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
475
37 by edam
- updated TODO and NEWS
476
				if( _parser_multiline_state != MULTILINE_NONE )
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
477
				{
478
					// if we're currently in a multi-line value, use the stored
479
					// property name and parameters
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
480
					name_and_params = _parser_current_name_and_params;
481
37 by edam
- updated TODO and NEWS
482
					// skip some initial line characters, depending on the type
483
					// of multi-line we're handling
36 by edam
- formatting: removed some double-indents on overrunning lines
484
					pos = buffer.position();
37 by edam
- updated TODO and NEWS
485
					switch( _parser_multiline_state )
486
					{
487
					case MULTILINE_FOLDED:
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
488
						pos++;
37 by edam
- updated TODO and NEWS
489
						break;
490
					case MULTILINE_ENCODED:
36 by edam
- formatting: removed some double-indents on overrunning lines
491
						while( pos < buffer.limit() && (
492
							buffer.get( pos ) == ' ' ||
493
							buffer.get( pos ) == '\t' ) )
494
						{
495
							pos++;
496
						}
37 by edam
- updated TODO and NEWS
497
						break;
498
					default:
499
						// do nothing
500
					}
501
502
					// take us out of multi-line so that we can re-detect that
503
					// this line is a multi-line or not
504
					_parser_multiline_state = MULTILINE_NONE;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
505
				}
506
				else
507
				{
36 by edam
- formatting: removed some double-indents on overrunning lines
508
					// get name and params from line, and since we're not
509
					// parsing a subsequent line in a multi-line, this should
510
					// not fail, or it's an error
511
					name_and_params =
512
						extractNameAndParamsFromLine( buffer, line );
513
					if( name_and_params == null )
514
						throw new ParseException(
515
							R.string.error_vcf_malformed );
516
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
517
					// calculate how many chars to skip from beginning of line
518
					// so we skip the property "name:" part
36 by edam
- formatting: removed some double-indents on overrunning lines
519
					pos = buffer.position() + name_and_params.length() + 1;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
520
521
					// reset the saved multi-line state
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
522
					_parser_current_name_and_params = name_and_params;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
523
					_parser_buffered_value_so_far = "";
524
				}
525
36 by edam
- formatting: removed some double-indents on overrunning lines
526
				// get value from buffer, as raw bytes
527
				ByteBuffer value;
528
				value = ByteBuffer.wrap( buffer.array(), pos,
529
					buffer.limit() - pos );
530
1 by edam
Initial import
531
				// get parameter parts
25 by edam
- fixed bug where parts[0] was assumed to exists after calling split()
532
				String[] name_param_parts = name_and_params.split( ";", -1 );
533
				for( int i = 0; i < name_param_parts.length; i++ )
534
					name_param_parts[ i ] = name_param_parts[ i ].trim();
1 by edam
Initial import
535
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
536
				// parse encoding parameter
25 by edam
- fixed bug where parts[0] was assumed to exists after calling split()
537
				String encoding = checkParam( name_param_parts, "ENCODING" );
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
538
				if( encoding != null ) encoding = encoding.toUpperCase();
539
				if( encoding != null && !encoding.equals( "8BIT" ) &&
540
					!encoding.equals( "QUOTED-PRINTABLE" ) )
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
541
					//&& !encoding.equals( "BASE64" ) )
542
				{
543
					throw new ParseException( R.string.error_vcf_encoding );
544
				}
545
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
546
				// parse charset parameter
25 by edam
- fixed bug where parts[0] was assumed to exists after calling split()
547
				String charset = checkParam( name_param_parts, "CHARSET" );
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
548
				if( charset != null ) charset = charset.toUpperCase();
549
				if( charset != null && !charset.equals( "US-ASCII" ) &&
36 by edam
- formatting: removed some double-indents on overrunning lines
550
					!charset.equals( "ASCII" ) &&
551
					!charset.equals( "UTF-8" ) )
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
552
				{
553
					throw new ParseException( R.string.error_vcf_charset );
554
				}
555
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
556
				// do unencoding (or default to a fake unencoding result with
557
				// the raw string)
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
558
				UnencodeResult unencoding_result = null;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
559
				if( encoding != null && encoding.equals( "QUOTED-PRINTABLE" ) )
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
560
					unencoding_result = unencodeQuotedPrintable( value );
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
561
//				else if( encoding != null && encoding.equals( "BASE64" ) )
34 by edam
- check for empty data "values" after parsing line parameters, so that we catch parameter errors (such as unknown encoding types).
562
//					unencoding_result = unencodeBase64( props[ 1 ], charset );
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
563
				if( unencoding_result != null ) {
564
					value = unencoding_result.getBuffer();
37 by edam
- updated TODO and NEWS
565
					if( unencoding_result.isAnotherLineRequired() )
566
						_parser_multiline_state = MULTILINE_ENCODED;
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
567
				}
568
569
				// convert 8-bit ASCII charset to US-ASCII
33 by edam
- fixed a couple of java string comparison checks
570
				if( charset == null || charset.equals( "ASCII" ) ) {
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
571
					value = transcodeAsciiToUtf8( value );
572
					charset = "UTF-8";
573
				}
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
574
575
				// process charset
36 by edam
- formatting: removed some double-indents on overrunning lines
576
				String string_value;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
577
				try {
36 by edam
- formatting: removed some double-indents on overrunning lines
578
					string_value = new String( value.array(), value.position(),
579
						value.limit() - value.position(), charset );
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
580
				} catch( UnsupportedEncodingException e ) {
581
					throw new ParseException( R.string.error_vcf_charset );
582
				}
583
37 by edam
- updated TODO and NEWS
584
				// for some entries that have semicolon-separated value parts,
585
				// check to see if the value ends in an escape character, which
586
				// indicates that we have a multi-line value
587
				if( ( name_param_parts[ 0 ].equals( "N" ) ||
588
					name_param_parts[ 0 ].equals( "ORG" ) ||
589
					name_param_parts[ 0 ].equals( "ADR" ) ) &&
590
					doesStringEndInAnEscapeChar( string_value ) )
591
				{
592
					_parser_multiline_state = MULTILINE_ESCAPED;
593
					string_value = string_value.substring( 0,
594
						string_value.length() - 1 );
595
				}
596
36 by edam
- formatting: removed some double-indents on overrunning lines
597
				// now we know whether we're in an encoding multi-line,
598
				// determine if we're in a v3 folded multi-line or not
37 by edam
- updated TODO and NEWS
599
				if( _parser_multiline_state == MULTILINE_NONE &&
600
					_version.equals( "3.0" ) && next_line_looks_folded )
601
				{
602
					_parser_multiline_state = MULTILINE_FOLDED;
603
				}
36 by edam
- formatting: removed some double-indents on overrunning lines
604
37 by edam
- updated TODO and NEWS
605
				// handle multi-lines by buffering them and parsing them when we
606
				// are processing the last line in a multi-line sequence
607
				if( _parser_multiline_state != MULTILINE_NONE ) {
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
608
					_parser_buffered_value_so_far += string_value;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
609
					return;
610
				}
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
611
				String complete_value =
37 by edam
- updated TODO and NEWS
612
					( _parser_buffered_value_so_far + string_value ).trim();
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
613
34 by edam
- check for empty data "values" after parsing line parameters, so that we catch parameter errors (such as unknown encoding types).
614
				// ignore empty values
615
				if( complete_value.length() < 1 ) return;
616
1 by edam
Initial import
617
				// parse some properties
25 by edam
- fixed bug where parts[0] was assumed to exists after calling split()
618
				if( name_param_parts[ 0 ].equals( "N" ) )
619
					parseN( name_param_parts, complete_value );
620
				else if( name_param_parts[ 0 ].equals( "FN" ) )
621
					parseFN( name_param_parts, complete_value );
622
				else if( name_param_parts[ 0 ].equals( "ORG" ) )
623
					parseORG( name_param_parts, complete_value );
624
				else if( name_param_parts[ 0 ].equals( "TEL" ) )
625
					parseTEL( name_param_parts, complete_value );
626
				else if( name_param_parts[ 0 ].equals( "EMAIL" ) )
627
					parseEMAIL( name_param_parts, complete_value );
37 by edam
- updated TODO and NEWS
628
				else if( name_param_parts[ 0 ].equals( "ADR" ) )
629
					parseADR( name_param_parts, complete_value );
630
			}
631
		}
632
633
		private boolean doesStringEndInAnEscapeChar( String string )
634
		{
635
			// count the number of backslashes at the end of the string
636
			int count = 0;
637
			for( int a = string.length() - 1; a >= 0; a-- )
638
				if( string.charAt( a ) == '\\' )
639
					count++;
640
				else
641
					break;
642
643
			// if there are an even number of backslashes then the final one
644
			// doesn't count
645
			return ( count & 1 ) == 1;
646
		}
647
648
		private String[] splitValueBySemicolon( String value )
649
		{
650
			// split string in to parts by semicolon
651
			ArrayList< String > parts = new ArrayList< String >(
652
				Arrays.asList( value.split(  ";" ) ) );
653
654
			// go through parts
655
			for( int a = 0; a < parts.size(); a++ )
656
			{
657
				String str = parts.get( a );
658
659
				// look for parts that end in an escape character, but ignore
660
				// the final part. We've already detected escape chars at the
661
				// end of the final part in parseLine() and handled multi-lines
662
				// accordingly.
663
				if( a < parts.size() - 1 &&
664
					doesStringEndInAnEscapeChar( str ) )
665
				{
666
					// join the next part to this part and remove the next part
667
					parts.set( a, str.substring( 0, str.length() - 1 ) +
668
						';' + parts.get( a + 1 ) );
669
					parts.remove( a + 1 );
670
671
					// re-visit this part
672
					a--;
673
					continue;
674
				}
675
676
				// trim and replace string
677
				str = str.trim();
678
				parts.set( a, str );
679
			}
680
681
			String[] ret = new String[ parts.size() ];
682
			return parts.toArray( ret );
1 by edam
Initial import
683
		}
684
685
		private void parseN( String[] params, String value )
36 by edam
- formatting: removed some double-indents on overrunning lines
686
			throws ParseException, SkipContactException,
687
			AbortImportException
1 by edam
Initial import
688
		{
689
			// already got a better name?
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
690
			if( _name_level >= NAMELEVEL_N ) return;
1 by edam
Initial import
691
692
			// get name parts
37 by edam
- updated TODO and NEWS
693
			String[] name_parts = splitValueBySemicolon( value );
1 by edam
Initial import
694
695
			// build name
696
			value = "";
25 by edam
- fixed bug where parts[0] was assumed to exists after calling split()
697
			if( name_parts.length > 1 && name_parts[ 1 ].length() > 0 )
698
				value += name_parts[ 1 ];
699
			if( name_parts.length > 0 && name_parts[ 0 ].length() > 0 )
700
				value += ( value.length() == 0? "" : " " ) + name_parts[ 0 ];
1 by edam
Initial import
701
702
			// set name
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
703
			setName( value );
704
			_name_level = NAMELEVEL_N;
1 by edam
Initial import
705
706
			// check now to see if we need to import this contact (to avoid
707
			// parsing the rest of the vCard unnecessarily)
708
			if( !isImportRequired( getName() ) )
709
				throw new SkipContactException();
710
		}
711
712
		private void parseFN( String[] params, String value )
36 by edam
- formatting: removed some double-indents on overrunning lines
713
			throws ParseException, SkipContactException
1 by edam
Initial import
714
		{
715
			// already got a better name?
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
716
			if( _name_level >= NAMELEVEL_FN ) return;
1 by edam
Initial import
717
718
			// set name
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
719
			setName( value );
720
			_name_level = NAMELEVEL_FN;
1 by edam
Initial import
721
		}
722
723
		private void parseORG( String[] params, String value )
36 by edam
- formatting: removed some double-indents on overrunning lines
724
			throws ParseException, SkipContactException
1 by edam
Initial import
725
		{
726
			// already got a better name?
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
727
			if( _name_level >= NAMELEVEL_ORG ) return;
1 by edam
Initial import
728
729
			// get org parts
37 by edam
- updated TODO and NEWS
730
			String[] org_parts = splitValueBySemicolon( value );
1 by edam
Initial import
731
732
			// build name
25 by edam
- fixed bug where parts[0] was assumed to exists after calling split()
733
			if( org_parts.length > 1 && org_parts[ 0 ].length() == 0 )
734
				value = org_parts[ 1 ];
37 by edam
- updated TODO and NEWS
735
			else if( org_parts.length > 1 && org_parts[ 1 ].length() > 0 )
736
				value = org_parts[ 0 ] + ", " + org_parts[ 1 ];
1 by edam
Initial import
737
			else
25 by edam
- fixed bug where parts[0] was assumed to exists after calling split()
738
				value = org_parts[ 0 ];
1 by edam
Initial import
739
740
			// set name
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
741
			setName( value );
742
			_name_level = NAMELEVEL_ORG;
1 by edam
Initial import
743
		}
744
745
		private void parseTEL( String[] params, String value )
36 by edam
- formatting: removed some double-indents on overrunning lines
746
			throws ParseException
1 by edam
Initial import
747
		{
748
			if( value.length() == 0 ) return;
749
750
			Set< String > types = extractTypes( params, Arrays.asList(
36 by edam
- formatting: removed some double-indents on overrunning lines
751
				"PREF", "HOME", "WORK", "VOICE", "FAX", "MSG", "CELL",
752
				"PAGER", "BBS", "MODEM", "CAR", "ISDN", "VIDEO" ) );
1 by edam
Initial import
753
754
			// here's the logic...
755
			boolean preferred = types.contains( "PREF" );
24 by edam
- import phone numbers even when they have no specified type (default to mobile)
756
			int type = PhonesColumns.TYPE_MOBILE;
1 by edam
Initial import
757
			if( types.contains( "VOICE" ) )
758
				if( types.contains( "WORK" ) )
24 by edam
- import phone numbers even when they have no specified type (default to mobile)
759
					type = PhonesColumns.TYPE_WORK;
1 by edam
Initial import
760
				else
24 by edam
- import phone numbers even when they have no specified type (default to mobile)
761
					type = PhonesColumns.TYPE_HOME;
1 by edam
Initial import
762
			else if( types.contains( "CELL" ) || types.contains( "VIDEO" ) )
24 by edam
- import phone numbers even when they have no specified type (default to mobile)
763
				type = PhonesColumns.TYPE_MOBILE;
1 by edam
Initial import
764
			if( types.contains( "FAX" ) )
765
				if( types.contains( "HOME" ) )
24 by edam
- import phone numbers even when they have no specified type (default to mobile)
766
					type = PhonesColumns.TYPE_FAX_HOME;
1 by edam
Initial import
767
				else
24 by edam
- import phone numbers even when they have no specified type (default to mobile)
768
					type = PhonesColumns.TYPE_FAX_WORK;
1 by edam
Initial import
769
			if( types.contains( "PAGER" ) )
24 by edam
- import phone numbers even when they have no specified type (default to mobile)
770
				type = PhonesColumns.TYPE_PAGER;
771
772
			// add phone number
773
			addPhone( value, type, preferred );
1 by edam
Initial import
774
		}
775
776
		public void parseEMAIL( String[] params, String value )
36 by edam
- formatting: removed some double-indents on overrunning lines
777
			throws ParseException
1 by edam
Initial import
778
		{
779
			if( value.length() == 0 ) return;
780
781
			Set< String > types = extractTypes( params, Arrays.asList(
36 by edam
- formatting: removed some double-indents on overrunning lines
782
				"PREF", "WORK", "HOME", "INTERNET" ) );
1 by edam
Initial import
783
37 by edam
- updated TODO and NEWS
784
			// add email address
1 by edam
Initial import
785
			boolean preferred = types.contains( "PREF" );
786
			if( types.contains( "WORK" ) )
787
				addEmail( value, Contacts.ContactMethods.TYPE_WORK, preferred );
788
			else
789
				addEmail( value, Contacts.ContactMethods.TYPE_HOME, preferred );
790
		}
791
37 by edam
- updated TODO and NEWS
792
		private void parseADR( String[] params, String value )
793
			throws ParseException, SkipContactException
794
		{
795
			// get address parts
796
			String[] adr_parts = splitValueBySemicolon( value );
797
798
			// build address
799
			value = "";
800
			for( int a = 0; a < adr_parts.length; a++ ) {
801
				if( value.length() > 0 ) value += "\n";
802
				value += adr_parts[ a ].trim();
803
			}
804
805
			Set< String > types = extractTypes( params, Arrays.asList(
806
				"PREF", "WORK", "HOME", "INTERNET" ) );
807
808
			// add address
809
			if( types.contains( "WORK" ) )
810
				addAddress( value, Contacts.ContactMethods.TYPE_WORK );
811
			else
812
				addAddress( value, Contacts.ContactMethods.TYPE_HOME);
813
		}
814
1 by edam
Initial import
815
		public void finaliseParsing()
36 by edam
- formatting: removed some double-indents on overrunning lines
816
			throws ParseException, SkipContactException,
817
			AbortImportException
1 by edam
Initial import
818
		{
819
			// missing version (and data is present)
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
820
			if( _version == null && _buffers != null )
1 by edam
Initial import
821
				throw new ParseException( R.string.error_vcf_malformed );
822
823
			//  missing name properties?
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
824
			if( _name_level == NAMELEVEL_NONE )
1 by edam
Initial import
825
				throw new ParseException( R.string.error_vcf_noname );
826
827
			// check if we should import this one? If we've already got an 'N'-
828
			// type name, this will already have been done by parseN() so we
829
			// mustn't do this here (or it could prompt twice!)
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
830
			if( _name_level < NAMELEVEL_N && !isImportRequired( getName() ) )
1 by edam
Initial import
831
				throw new SkipContactException();
832
		}
833
834
		private String checkParam( String[] params, String name )
835
		{
35 by edam
- accept parameters that are quoted (this doesn't appear to be part of the standards AFAICT, but Evolution apparently quotes parameter values)
836
			Pattern p = Pattern.compile(
36 by edam
- formatting: removed some double-indents on overrunning lines
837
				"^" + name + "[ \\t]*=[ \\t]*(\"?)(.*)\\1$" );
1 by edam
Initial import
838
			for( int i = 0; i < params.length; i++ ) {
839
				Matcher m = p.matcher( params[ i ] );
840
				if( m.matches() )
35 by edam
- accept parameters that are quoted (this doesn't appear to be part of the standards AFAICT, but Evolution apparently quotes parameter values)
841
					return m.group( 2 );
1 by edam
Initial import
842
			}
843
			return null;
844
		}
845
846
		private Set< String > extractTypes( String[] params,
36 by edam
- formatting: removed some double-indents on overrunning lines
847
			List< String > valid_types )
1 by edam
Initial import
848
		{
849
			HashSet< String > types = new HashSet< String >();
850
851
			// get 3.0-style TYPE= param
25 by edam
- fixed bug where parts[0] was assumed to exists after calling split()
852
			String type_param;
853
			if( ( type_param = checkParam( params, "TYPE" ) ) != null ) {
854
				String[] parts = type_param.split( "," );
855
				for( int i = 0; i < parts.length; i++ )
856
					if( valid_types.contains( parts[ i ] ) )
857
						types.add( parts[ i ] );
1 by edam
Initial import
858
			}
859
860
			// get 2.1-style type param
861
			if( _version.equals( "2.1" ) ) {
862
				for( int i = 1; i < params.length; i++ )
25 by edam
- fixed bug where parts[0] was assumed to exists after calling split()
863
					if( valid_types.contains( params[ i ] ) )
1 by edam
Initial import
864
						types.add( params[ i ] );
865
			}
866
867
			return types;
868
		}
869
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
870
		private UnencodeResult unencodeQuotedPrintable( ByteBuffer in )
1 by edam
Initial import
871
		{
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
872
			boolean another = false;
873
36 by edam
- formatting: removed some double-indents on overrunning lines
874
			// unencode quoted-printable encoding, as per RFC1521 section 5.1
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
875
			byte[] out = new byte[ in.limit() - in.position() ];
1 by edam
Initial import
876
			int j = 0;
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
877
			for( int i = in.position(); i < in.limit(); i++ )
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
878
			{
879
				// get next char and process...
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
880
				byte ch = in.array()[ i ];
881
				if( ch == '=' && i < in.limit() - 2 )
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
882
				{
883
					// we found a =XX format byte, add it
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
884
					out[ j ] = (byte)(
36 by edam
- formatting: removed some double-indents on overrunning lines
885
							Character.digit( in.array()[ i + 1 ], 16 ) * 16 +
886
							Character.digit( in.array()[ i + 2 ], 16 ) );
1 by edam
Initial import
887
					i += 2;
888
				}
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
889
				else if( ch == '=' && i == in.limit() - 1 )
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
890
				{
891
					// we found a '=' at the end of a line signifying a multi-
892
					// line string, so we don't add it.
893
					another = true;
894
					continue;
895
				}
1 by edam
Initial import
896
				else
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
897
					// just a normal char...
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
898
					out[ j ] = (byte)ch;
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
899
				j++;
1 by edam
Initial import
900
			}
18 by edam
- changed case on charset and encoding warning strings (it looked bad)
901
22 by edam
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
902
			return new UnencodeResult( another, ByteBuffer.wrap( out, 0, j ) );
903
		}
904
905
		private ByteBuffer transcodeAsciiToUtf8( ByteBuffer in )
906
		{
907
			// transcode
908
			byte[] out = new byte[ ( in.limit() - in.position() ) * 2 ];
909
			int j = 0;
910
			for( int a = in.position(); a < in.limit(); a++ )
911
			{
912
				// if char is < 127, keep it as-is
913
				if( in.array()[ a ] >= 0 )
914
					out[ j++ ] = in.array()[ a ];
915
916
				// else, convert it to UTF-8
917
				else {
918
					int b = 0xff & (int)in.array()[ a ];
919
					out[ j++ ] = (byte)( 0xc0 | ( b >> 6 ) );
920
					out[ j++ ] = (byte)( 0x80 | ( b & 0x3f ) );
921
				}
922
			}
923
924
			return ByteBuffer.wrap( out, 0, j );
1 by edam
Initial import
925
		}
926
	}
927
}